From 51b26ada79b605ed709ddcedbb6012e8f8e0ebed Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 26 Apr 2009 10:12:47 -0700 Subject: x86: unify arch/x86/boot/compressed/vmlinux_*.lds Look at the: diff -u arch/x86/boot/compressed/vmlinux_*.lds output and realize that they're basially exactly the same except for trivial naming differences, and the fact that the 64-bit version has a "pgtable" thing. So unify them. There's some trivial cleanup there (make the output format a Kconfig thing rather than doing #ifdef's for it, and unify both 32-bit and 64-bit BSS end to "_ebss", where 32-bit used to use the traditional "_end"), but other than that it's really very mindless and straigt conversion. For example, I think we should aim to remove "startup_32" vs "startup_64", and just call it "startup", and get rid of one more difference. I didn't do that. Also, notice the comment in the unified vmlinux.lds.S talks about "head_64" and "startup_32" which is an odd and incorrect mix, but that was actually what the old 64-bit only lds file had, so the confusion isn't new, and now that mixing is arguably more accurate thanks to the vmlinux.lds.S file being shared between the two cases ;) [ Impact: cleanup, unification ] Signed-off-by: Linus Torvalds Acked-by: Sam Ravnborg Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/head_32.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/boot/compressed/head_32.S') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 3a8a866fb2e..85bd3285706 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -88,9 +88,9 @@ ENTRY(startup_32) * where decompression in place becomes safe. */ pushl %esi - leal _end(%ebp), %esi - leal _end(%ebx), %edi - movl $(_end - startup_32), %ecx + leal _ebss(%ebp), %esi + leal _ebss(%ebx), %edi + movl $(_ebss - startup_32), %ecx std rep movsb @@ -121,7 +121,7 @@ relocated: */ xorl %eax,%eax leal _edata(%ebx),%edi - leal _end(%ebx), %ecx + leal _ebss(%ebx), %ecx subl %edi,%ecx cld rep -- cgit v1.2.3-70-g09d2 From bd2a36984c50bb546a7d04cb395fddcf98a1092c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 5 May 2009 23:24:50 -0700 Subject: x86, boot: use BP_scratch in arch/x86/boot/compressed/head_*.S Use the BP_scratch symbol from asm-offsets.h instead of hard-coding the location. [ Impact: cleanup ] Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/head_32.S | 2 +- arch/x86/boot/compressed/head_64.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/boot/compressed/head_32.S') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 85bd3285706..e3398f3d1b3 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -53,7 +53,7 @@ ENTRY(startup_32) * data at 0x1e4 (defined as a scratch field) are used as the stack * for this calculation. Only 4 bytes are needed. */ - leal (0x1e4+4)(%esi), %esp + leal (BP_scratch+4)(%esi), %esp call 1f 1: popl %ebp subl $1b, %ebp diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index ed4a8294800..06cc7e59352 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -56,7 +56,7 @@ ENTRY(startup_32) * data at 0x1e4 (defined as a scratch field) are used as the stack * for this calculation. Only 4 bytes are needed. */ - leal (0x1e4+4)(%esi), %esp + leal (BP_scratch+4)(%esi), %esp call 1f 1: popl %ebp subl $1b, %ebp -- cgit v1.2.3-70-g09d2 From 5f64ec64e7f9b246c0a94f34cdf7782f98a6e55d Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 8 May 2009 15:45:17 -0700 Subject: x86, boot: stylistic cleanups for boot/compressed/head_32.S Reformat arch/x86/boot/compressed/head_32.S to be closer to currently preferred kernel assembly style, that is: - opcode and operand separated by tab - operands separated by ", " - C-style comments This also makes it more similar to head_64.S. [ Impact: cleanup, no object code change ] Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/head_32.S | 170 +++++++++++++++++++------------------ 1 file changed, 89 insertions(+), 81 deletions(-) (limited to 'arch/x86/boot/compressed/head_32.S') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index e3398f3d1b3..7bd7766ffab 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -12,16 +12,16 @@ * the page directory. [According to comments etc elsewhere on a compressed * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC] * - * Page 0 is deliberately kept safe, since System Management Mode code in + * Page 0 is deliberately kept safe, since System Management Mode code in * laptops may need to access the BIOS data stored there. This is also - * useful for future device drivers that either access the BIOS via VM86 + * useful for future device drivers that either access the BIOS via VM86 * mode. */ /* * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 */ -.text + .text #include #include @@ -29,75 +29,80 @@ #include #include -.section ".text.head","ax",@progbits + .section ".text.head","ax",@progbits ENTRY(startup_32) cld - /* test KEEP_SEGMENTS flag to see if the bootloader is asking - * us to not reload segments */ - testb $(1<<6), BP_loadflags(%esi) - jnz 1f + /* + * Test KEEP_SEGMENTS flag to see if the bootloader is asking + * us to not reload segments + */ + testb $(1<<6), BP_loadflags(%esi) + jnz 1f cli - movl $(__BOOT_DS),%eax - movl %eax,%ds - movl %eax,%es - movl %eax,%fs - movl %eax,%gs - movl %eax,%ss + movl $__BOOT_DS, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %fs + movl %eax, %gs + movl %eax, %ss 1: -/* Calculate the delta between where we were compiled to run +/* + * Calculate the delta between where we were compiled to run * at and where we were actually loaded at. This can only be done * with a short local call on x86. Nothing else will tell us what * address we are running at. The reserved chunk of the real-mode * data at 0x1e4 (defined as a scratch field) are used as the stack * for this calculation. Only 4 bytes are needed. */ - leal (BP_scratch+4)(%esi), %esp - call 1f -1: popl %ebp - subl $1b, %ebp + leal (BP_scratch+4)(%esi), %esp + call 1f +1: popl %ebp + subl $1b, %ebp -/* %ebp contains the address we are loaded at by the boot loader and %ebx +/* + * %ebp contains the address we are loaded at by the boot loader and %ebx * contains the address where we should move the kernel image temporarily * for safe in-place decompression. */ #ifdef CONFIG_RELOCATABLE - movl %ebp, %ebx + movl %ebp, %ebx addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebx andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebx #else - movl $LOAD_PHYSICAL_ADDR, %ebx + movl $LOAD_PHYSICAL_ADDR, %ebx #endif /* Replace the compressed data size with the uncompressed size */ - subl input_len(%ebp), %ebx - movl output_len(%ebp), %eax - addl %eax, %ebx + subl input_len(%ebp), %ebx + movl output_len(%ebp), %eax + addl %eax, %ebx /* Add 8 bytes for every 32K input block */ - shrl $12, %eax - addl %eax, %ebx + shrl $12, %eax + addl %eax, %ebx /* Add 32K + 18 bytes of extra slack */ - addl $(32768 + 18), %ebx + addl $(32768 + 18), %ebx /* Align on a 4K boundary */ - addl $4095, %ebx - andl $~4095, %ebx + addl $4095, %ebx + andl $~4095, %ebx -/* Copy the compressed kernel to the end of our buffer +/* + * Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. */ - pushl %esi - leal _ebss(%ebp), %esi - leal _ebss(%ebx), %edi - movl $(_ebss - startup_32), %ecx + pushl %esi + leal _ebss(%ebp), %esi + leal _ebss(%ebx), %edi + movl $(_ebss - startup_32), %ecx std - rep - movsb + rep movsb cld - popl %esi + popl %esi -/* Compute the kernel start address. +/* + * Compute the kernel start address. */ #ifdef CONFIG_RELOCATABLE addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebp @@ -109,81 +114,84 @@ ENTRY(startup_32) /* * Jump to the relocated address. */ - leal relocated(%ebx), %eax - jmp *%eax + leal relocated(%ebx), %eax + jmp *%eax ENDPROC(startup_32) -.section ".text" + .text relocated: /* * Clear BSS */ - xorl %eax,%eax - leal _edata(%ebx),%edi - leal _ebss(%ebx), %ecx - subl %edi,%ecx + xorl %eax, %eax + leal _edata(%ebx), %edi + leal _ebss(%ebx), %ecx + subl %edi, %ecx cld - rep - stosb + rep stosb /* * Setup the stack for the decompressor */ - leal boot_stack_end(%ebx), %esp + leal boot_stack_end(%ebx), %esp /* * Do the decompression, and jump to the new kernel.. */ - movl output_len(%ebx), %eax - pushl %eax - # push arguments for decompress_kernel: - pushl %ebp # output address - movl input_len(%ebx), %eax - pushl %eax # input_len - leal input_data(%ebx), %eax - pushl %eax # input_data - leal boot_heap(%ebx), %eax - pushl %eax # heap area - pushl %esi # real mode pointer - call decompress_kernel - addl $20, %esp - popl %ecx + movl output_len(%ebx), %eax + pushl %eax + /* push arguments for decompress_kernel: */ + pushl %ebp /* output address */ + movl input_len(%ebx), %eax + pushl %eax /* input_len */ + leal input_data(%ebx), %eax + pushl %eax /* input_data */ + leal boot_heap(%ebx), %eax + pushl %eax /* heap area */ + pushl %esi /* real mode pointer */ + call decompress_kernel + addl $20, %esp + popl %ecx #if CONFIG_RELOCATABLE -/* Find the address of the relocations. +/* + * Find the address of the relocations. */ - movl %ebp, %edi - addl %ecx, %edi + movl %ebp, %edi + addl %ecx, %edi -/* Calculate the delta between where vmlinux was compiled to run +/* + * Calculate the delta between where vmlinux was compiled to run * and where it was actually loaded. */ - movl %ebp, %ebx - subl $LOAD_PHYSICAL_ADDR, %ebx - jz 2f /* Nothing to be done if loaded at compiled addr. */ + movl %ebp, %ebx + subl $LOAD_PHYSICAL_ADDR, %ebx + jz 2f /* Nothing to be done if loaded at compiled addr. */ /* * Process relocations. */ -1: subl $4, %edi - movl 0(%edi), %ecx - testl %ecx, %ecx - jz 2f - addl %ebx, -__PAGE_OFFSET(%ebx, %ecx) - jmp 1b +1: subl $4, %edi + movl (%edi), %ecx + testl %ecx, %ecx + jz 2f + addl %ebx, -__PAGE_OFFSET(%ebx, %ecx) + jmp 1b 2: #endif /* * Jump to the decompressed kernel. */ - xorl %ebx,%ebx - jmp *%ebp + xorl %ebx, %ebx + jmp *%ebp -.bss -/* Stack and heap for uncompression */ -.balign 4 +/* + * Stack and heap for uncompression + */ + .bss + .balign 4 boot_heap: .fill BOOT_HEAP_SIZE, 1, 0 boot_stack: -- cgit v1.2.3-70-g09d2 From 5b11f1cee5797b38d16b94d8745b12b6727a8373 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 8 May 2009 16:20:34 -0700 Subject: x86, boot: straighten out ranges to copy/zero in compressed/head*.S Both on 32 and 64 bits, we copy all the way up to the end of bss, except that on 64 bits there is a hack to avoid copying on top of the page tables. There is no point in copying bss at all, especially since we are just about to zero it all anyway. To clean up and unify the handling, we now do: - copy from startup_32 to _bss. - zero from _bss to _ebss. - the _ebss symbol is aligned to an 8-byte boundary. - the page tables are moved to a separate section. Use _bss as the copy endpoint since _edata may be misaligned. [ Impact: cleanup, trivial performance improvement ] Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/head_32.S | 8 ++++---- arch/x86/boot/compressed/head_64.S | 18 +++++++++++++----- arch/x86/boot/compressed/vmlinux.lds.S | 19 ++++++++++++------- 3 files changed, 29 insertions(+), 16 deletions(-) (limited to 'arch/x86/boot/compressed/head_32.S') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 7bd7766ffab..59425e157df 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -93,9 +93,9 @@ ENTRY(startup_32) * where decompression in place becomes safe. */ pushl %esi - leal _ebss(%ebp), %esi - leal _ebss(%ebx), %edi - movl $(_ebss - startup_32), %ecx + leal _bss(%ebp), %esi + leal _bss(%ebx), %edi + movl $(_bss - startup_32), %ecx std rep movsb cld @@ -125,7 +125,7 @@ relocated: * Clear BSS */ xorl %eax, %eax - leal _edata(%ebx), %edi + leal _bss(%ebx), %edi leal _ebss(%ebx), %ecx subl %edi, %ecx cld diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 26c3def43ac..5bc9052615b 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -253,9 +253,9 @@ ENTRY(startup_64) * Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. */ - leaq _end_before_pgt(%rip), %r8 - leaq _end_before_pgt(%rbx), %r9 - movq $_end_before_pgt /* - $startup_32 */, %rcx + leaq _bss(%rip), %r8 + leaq _bss(%rbx), %r9 + movq $_bss /* - $startup_32 */, %rcx 1: subq $8, %r8 subq $8, %r9 movq 0(%r8), %rax @@ -276,8 +276,8 @@ relocated: * Clear BSS */ xorq %rax, %rax - leaq _edata(%rbx), %rdi - leaq _end_before_pgt(%rbx), %rcx + leaq _bss(%rbx), %rdi + leaq _ebss(%rbx), %rcx subq %rdi, %rcx cld rep stosb @@ -329,3 +329,11 @@ boot_heap: boot_stack: .fill BOOT_STACK_SIZE, 1, 0 boot_stack_end: + +/* + * Space for page tables (not in .bss so not zeroed) + */ + .section ".pgtable","a",@nobits + .balign 4096 +pgtable: + .fill 6*4096, 1, 0 diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index dbe515e13fe..cc353e1b3ff 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -2,6 +2,8 @@ OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) #undef i386 +#include + #ifdef CONFIG_X86_64 OUTPUT_ARCH(i386:x86-64) ENTRY(startup_64) @@ -48,13 +50,16 @@ SECTIONS *(.bss) *(.bss.*) *(COMMON) -#ifdef CONFIG_X86_64 - . = ALIGN(8); - _end_before_pgt = . ; - . = ALIGN(4096); - pgtable = . ; - . = . + 4096 * 6; -#endif + . = ALIGN(8); /* For convenience during zeroing */ _ebss = .; } +#ifdef CONFIG_X86_64 + . = ALIGN(PAGE_SIZE); + .pgtable : { + _pgtable = . ; + *(.pgtable) + _epgtable = . ; + } +#endif + _end = .; } -- cgit v1.2.3-70-g09d2 From 0a137736704ef9af719409933b3c33e138461786 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 8 May 2009 16:27:41 -0700 Subject: x86, boot: set up the decompression stack as early as possible Set up the decompression stack as soon as we know where it needs to go. That way we have a full-service stack as soon as possible, rather than relying on the BP_scratch field. Note that the stack does need to be empty during bss zeroing (or else the stack needs to be moved out of the bss segment, which is also an option.) [ Impact: cleanup, minor paranoia ] Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/head_32.S | 10 ++++------ arch/x86/boot/compressed/head_64.S | 16 ++++++++-------- 2 files changed, 12 insertions(+), 14 deletions(-) (limited to 'arch/x86/boot/compressed/head_32.S') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 59425e157df..d7245cf8026 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -88,6 +88,9 @@ ENTRY(startup_32) addl $4095, %ebx andl $~4095, %ebx + /* Set up the stack */ + leal boot_stack_end(%ebx), %esp + /* * Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. @@ -122,7 +125,7 @@ ENDPROC(startup_32) relocated: /* - * Clear BSS + * Clear BSS (stack is currently empty) */ xorl %eax, %eax leal _bss(%ebx), %edi @@ -131,11 +134,6 @@ relocated: cld rep stosb -/* - * Setup the stack for the decompressor - */ - leal boot_stack_end(%ebx), %esp - /* * Do the decompression, and jump to the new kernel.. */ diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 5bc9052615b..a0b18426069 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -249,6 +249,13 @@ ENTRY(startup_64) addq $(32768 + 18 + 4095), %rbx andq $~4095, %rbx + /* Set up the stack */ + leaq boot_stack_end(%rbx), %rsp + + /* Zero EFLAGS */ + pushq $0 + popfq + /* * Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. @@ -273,7 +280,7 @@ ENTRY(startup_64) relocated: /* - * Clear BSS + * Clear BSS (stack is currently empty) */ xorq %rax, %rax leaq _bss(%rbx), %rdi @@ -282,13 +289,6 @@ relocated: cld rep stosb - /* Setup the stack */ - leaq boot_stack_end(%rip), %rsp - - /* zero EFLAGS after setting rsp */ - pushq $0 - popfq - /* * Do the decompression, and jump to the new kernel.. */ -- cgit v1.2.3-70-g09d2 From 97541912785369925723b6255438ad9fce2ddf04 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 6 May 2009 17:56:51 -0700 Subject: x86, boot: zero EFLAGS on 32 bits The 64-bit code already clears EFLAGS as soon as it has a stack. This seems like a reasonable precaution, so do it on 32 bits as well. [ Impact: extra paranoia ] Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/head_32.S | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/boot/compressed/head_32.S') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index d7245cf8026..d02a4f02be1 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -91,6 +91,10 @@ ENTRY(startup_32) /* Set up the stack */ leal boot_stack_end(%ebx), %esp + /* Zero EFLAGS */ + pushl $0 + popfl + /* * Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. -- cgit v1.2.3-70-g09d2 From 36d3793c947f1ef7ba3d24eeeddc1be41adc5ab4 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 8 May 2009 16:45:15 -0700 Subject: x86, boot: use appropriate rep string for move and clear In the pre-decompression code, use the appropriate largest possible rep movs and rep stos to move code and clear bss, respectively. For reverse copy, do note that the initial values are supposed to be the address of the first (highest) copy datum, not one byte beyond the end of the buffer. rep strings are not necessarily the fastest way to perform these operations on all current processors, but are likely to be in the future, and perhaps more importantly, we want to encourage the architecturally right thing to do here. This also fixes a couple of trivial inefficiencies on 64 bits. [ Impact: trivial performance enhancement, increase code similarity ] Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/head_32.S | 11 ++++++----- arch/x86/boot/compressed/head_64.S | 26 +++++++++++++------------- 2 files changed, 19 insertions(+), 18 deletions(-) (limited to 'arch/x86/boot/compressed/head_32.S') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index d02a4f02be1..6710dc78ac5 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -100,11 +100,12 @@ ENTRY(startup_32) * where decompression in place becomes safe. */ pushl %esi - leal _bss(%ebp), %esi - leal _bss(%ebx), %edi + leal (_bss-4)(%ebp), %esi + leal (_bss-4)(%ebx), %edi movl $(_bss - startup_32), %ecx + shrl $2, %ecx std - rep movsb + rep movsl cld popl %esi @@ -135,8 +136,8 @@ relocated: leal _bss(%ebx), %edi leal _ebss(%ebx), %ecx subl %edi, %ecx - cld - rep stosb + shrl $2, %ecx + rep stosl /* * Do the decompression, and jump to the new kernel.. diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index a0b18426069..723c72dfd7b 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -260,15 +260,15 @@ ENTRY(startup_64) * Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. */ - leaq _bss(%rip), %r8 - leaq _bss(%rbx), %r9 + pushq %rsi + leaq (_bss-8)(%rip), %rsi + leaq (_bss-8)(%rbx), %rdi movq $_bss /* - $startup_32 */, %rcx -1: subq $8, %r8 - subq $8, %r9 - movq 0(%r8), %rax - movq %rax, 0(%r9) - subq $8, %rcx - jnz 1b + shrq $3, %rcx + std + rep movsq + cld + popq %rsi /* * Jump to the relocated address. @@ -282,12 +282,12 @@ relocated: /* * Clear BSS (stack is currently empty) */ - xorq %rax, %rax - leaq _bss(%rbx), %rdi - leaq _ebss(%rbx), %rcx + xorl %eax, %eax + leaq _bss(%rip), %rdi + leaq _ebss(%rip), %rcx subq %rdi, %rcx - cld - rep stosb + shrq $3, %rcx + rep stosq /* * Do the decompression, and jump to the new kernel.. -- cgit v1.2.3-70-g09d2 From 02a884c0fe7ec8459d00d34b7d4101af21fc4a86 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 8 May 2009 17:42:16 -0700 Subject: x86, boot: determine compressed code offset at compile time Determine the compressed code offset (from the kernel runtime address) at compile time. This allows some minor optimizations in arch/x86/boot/compressed/head_*.S, but more importantly it makes this value available to the build process, which will enable a future patch to export the necessary linear memory footprint into the bzImage header. [ Impact: cleanup, future patch enabling ] Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/Makefile | 11 ++-- arch/x86/boot/compressed/head_32.S | 24 ++------- arch/x86/boot/compressed/head_64.S | 41 +++++---------- arch/x86/boot/compressed/mkpiggy.c | 97 ++++++++++++++++++++++++++++++++++++ arch/x86/boot/compressed/vmlinux.scr | 10 ---- 5 files changed, 123 insertions(+), 60 deletions(-) create mode 100644 arch/x86/boot/compressed/mkpiggy.c delete mode 100644 arch/x86/boot/compressed/vmlinux.scr (limited to 'arch/x86/boot/compressed/head_32.S') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 7f24fdb584e..49c8a4c37d7 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -19,6 +19,8 @@ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ LDFLAGS := -m elf_$(UTS_MACHINE) LDFLAGS_vmlinux := -T +hostprogs-y := mkpiggy + $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE $(call if_changed,ld) @: @@ -50,6 +52,9 @@ suffix-$(CONFIG_KERNEL_GZIP) := gz suffix-$(CONFIG_KERNEL_BZIP2) := bz2 suffix-$(CONFIG_KERNEL_LZMA) := lzma -LDFLAGS_piggy.o := -r --format binary --oformat $(CONFIG_OUTPUT_FORMAT) -T -$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) FORCE - $(call if_changed,ld) +quiet_cmd_mkpiggy = MKPIGGY $@ + cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false ) + +targets += piggy.S +$(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE + $(call if_changed,mkpiggy) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 6710dc78ac5..470474bafc4 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -75,18 +75,8 @@ ENTRY(startup_32) movl $LOAD_PHYSICAL_ADDR, %ebx #endif - /* Replace the compressed data size with the uncompressed size */ - subl input_len(%ebp), %ebx - movl output_len(%ebp), %eax - addl %eax, %ebx - /* Add 8 bytes for every 32K input block */ - shrl $12, %eax - addl %eax, %ebx - /* Add 32K + 18 bytes of extra slack */ - addl $(32768 + 18), %ebx - /* Align on a 4K boundary */ - addl $4095, %ebx - andl $~4095, %ebx + /* Target address to relocate to for decompression */ + addl $z_extract_offset, %ebx /* Set up the stack */ leal boot_stack_end(%ebx), %esp @@ -142,12 +132,10 @@ relocated: /* * Do the decompression, and jump to the new kernel.. */ - movl output_len(%ebx), %eax - pushl %eax + leal z_extract_offset_negative(%ebx), %ebp /* push arguments for decompress_kernel: */ pushl %ebp /* output address */ - movl input_len(%ebx), %eax - pushl %eax /* input_len */ + pushl $z_input_len /* input_len */ leal input_data(%ebx), %eax pushl %eax /* input_data */ leal boot_heap(%ebx), %eax @@ -155,14 +143,12 @@ relocated: pushl %esi /* real mode pointer */ call decompress_kernel addl $20, %esp - popl %ecx #if CONFIG_RELOCATABLE /* * Find the address of the relocations. */ - movl %ebp, %edi - addl %ecx, %edi + leal z_output_len(%ebp), %edi /* * Calculate the delta between where vmlinux was compiled to run diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 723c72dfd7b..2b9f2510507 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -90,16 +90,8 @@ ENTRY(startup_32) movl $CONFIG_PHYSICAL_START, %ebx #endif - /* Replace the compressed data size with the uncompressed size */ - subl input_len(%ebp), %ebx - movl output_len(%ebp), %eax - addl %eax, %ebx - /* Add 8 bytes for every 32K input block */ - shrl $12, %eax - addl %eax, %ebx - /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */ - addl $(32768 + 18 + 4095), %ebx - andl $~4095, %ebx + /* Target address to relocate to for decompression */ + addl $z_extract_offset, %ebx /* * Prepare for entering 64 bit mode @@ -224,6 +216,9 @@ ENTRY(startup_64) * If it is a relocatable kernel then decompress and run the kernel * from load address aligned to 2MB addr, otherwise decompress and * run the kernel from CONFIG_PHYSICAL_START + * + * We cannot rely on the calculation done in 32-bit mode, since we + * may have been invoked via the 64-bit entry point. */ /* Start with the delta to where the kernel will run at. */ @@ -237,17 +232,8 @@ ENTRY(startup_64) movq %rbp, %rbx #endif - /* Replace the compressed data size with the uncompressed size */ - movl input_len(%rip), %eax - subq %rax, %rbx - movl output_len(%rip), %eax - addq %rax, %rbx - /* Add 8 bytes for every 32K input block */ - shrq $12, %rax - addq %rax, %rbx - /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */ - addq $(32768 + 18 + 4095), %rbx - andq $~4095, %rbx + /* Target address to relocate to for decompression */ + leaq z_extract_offset(%rbp), %rbx /* Set up the stack */ leaq boot_stack_end(%rbx), %rsp @@ -292,13 +278,12 @@ relocated: /* * Do the decompression, and jump to the new kernel.. */ - pushq %rsi # Save the real mode argument - movq %rsi, %rdi # real mode address - leaq boot_heap(%rip), %rsi # malloc area for uncompression - leaq input_data(%rip), %rdx # input_data - movl input_len(%rip), %eax - movq %rax, %rcx # input_len - movq %rbp, %r8 # output + pushq %rsi /* Save the real mode argument */ + movq %rsi, %rdi /* real mode address */ + leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ + leaq input_data(%rip), %rdx /* input_data */ + movl $z_input_len, %ecx /* input_len */ + movq %rbp, %r8 /* output target address */ call decompress_kernel popq %rsi diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c new file mode 100644 index 00000000000..bcbd36c4143 --- /dev/null +++ b/arch/x86/boot/compressed/mkpiggy.c @@ -0,0 +1,97 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright (C) 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * H. Peter Anvin + * + * ----------------------------------------------------------------------- */ + +/* + * Compute the desired load offset from a compressed program; outputs + * a small assembly wrapper with the appropriate symbols defined. + */ + +#include +#include +#include +#include + +static uint32_t getle32(const void *p) +{ + const uint8_t *cp = p; + + return (uint32_t)cp[0] + ((uint32_t)cp[1] << 8) + + ((uint32_t)cp[2] << 16) + ((uint32_t)cp[3] << 24); +} + +int main(int argc, char *argv[]) +{ + uint32_t olen; + long ilen; + unsigned long offs; + FILE *f; + + if (argc < 2) { + fprintf(stderr, "Usage: %s compressed_file\n", argv[0]); + return 1; + } + + /* Get the information for the compressed kernel image first */ + + f = fopen(argv[1], "r"); + if (!f) { + perror(argv[1]); + return 1; + } + + + if (fseek(f, -4L, SEEK_END)) { + perror(argv[1]); + } + fread(&olen, sizeof olen, 1, f); + ilen = ftell(f); + olen = getle32(&olen); + fclose(f); + + /* + * Now we have the input (compressed) and output (uncompressed) + * sizes, compute the necessary decompression offset... + */ + + offs = (olen > ilen) ? olen - ilen : 0; + offs += olen >> 12; /* Add 8 bytes for each 32K block */ + offs += 32*1024 + 18; /* Add 32K + 18 bytes slack */ + offs = (offs+4095) & ~4095; /* Round to a 4K boundary */ + + printf(".section \".rodata.compressed\",\"a\",@progbits\n"); + printf(".globl z_input_len\n"); + printf("z_input_len = %lu\n", ilen); + printf(".globl z_output_len\n"); + printf("z_output_len = %lu\n", (unsigned long)olen); + printf(".globl z_extract_offset\n"); + printf("z_extract_offset = 0x%lx\n", offs); + /* z_extract_offset_negative allows simplification of head_32.S */ + printf(".globl z_extract_offset_negative\n"); + printf("z_extract_offset_negative = -0x%lx\n", offs); + + printf(".globl input_data, input_data_end\n"); + printf("input_data:\n"); + printf(".incbin \"%s\"\n", argv[1]); + printf("input_data_end:\n"); + + return 0; +} diff --git a/arch/x86/boot/compressed/vmlinux.scr b/arch/x86/boot/compressed/vmlinux.scr deleted file mode 100644 index f02382ae5c4..00000000000 --- a/arch/x86/boot/compressed/vmlinux.scr +++ /dev/null @@ -1,10 +0,0 @@ -SECTIONS -{ - .rodata.compressed : { - input_len = .; - LONG(input_data_end - input_data) input_data = .; - *(.data) - output_len = . - 4; - input_data_end = .; - } -} -- cgit v1.2.3-70-g09d2 From 99aa45595f45603526513d5e29fc00f8afbf3913 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 11 May 2009 16:02:10 -0700 Subject: x86, boot: remove dead code from boot/compressed/head_*.S Remove a couple of lines of dead code from arch/x86/boot/compressed/head_*.S; all of these update registers that are dead in the current code. [ Impact: cleanup ] Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/head_32.S | 10 ---------- arch/x86/boot/compressed/head_64.S | 2 -- 2 files changed, 12 deletions(-) (limited to 'arch/x86/boot/compressed/head_32.S') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 470474bafc4..2b8e0dfa4b2 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -99,16 +99,6 @@ ENTRY(startup_32) cld popl %esi -/* - * Compute the kernel start address. - */ -#ifdef CONFIG_RELOCATABLE - addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebp - andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebp -#else - movl $LOAD_PHYSICAL_ADDR, %ebp -#endif - /* * Jump to the relocated address. */ diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 4135d438b66..2bb500af1bd 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -226,10 +226,8 @@ ENTRY(startup_64) leaq startup_32(%rip) /* - $startup_32 */, %rbp addq $(PMD_PAGE_SIZE - 1), %rbp andq $PMD_PAGE_MASK, %rbp - movq %rbp, %rbx #else movq $LOAD_PHYSICAL_ADDR, %rbp - movq %rbp, %rbx #endif /* Target address to relocate to for decompression */ -- cgit v1.2.3-70-g09d2 From 37ba7ab5e33cebc25c68fffe33e9f21e7c2014e8 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 11 May 2009 15:56:08 -0700 Subject: x86, boot: make kernel_alignment adjustable; new bzImage fields Make the kernel_alignment field adjustable; this allows us to set it to a large value (intended to be 16 MB to avoid ZONE_DMA contention, memory holes and other weirdness) while a smart bootloader can still force a loading at a lesser alignment if absolutely necessary. Also export pref_address (preferred loading address, corresponding to the link-time address) and init_size, the total amount of linear memory the kernel will require during initialization. [ Impact: allows better kernel placement, gives bootloader more info ] Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/head_32.S | 7 +++++-- arch/x86/boot/compressed/head_64.S | 14 ++++++++++---- arch/x86/boot/header.S | 15 +++++++++++++-- arch/x86/include/asm/boot.h | 15 +++++++++++++++ arch/x86/kernel/asm-offsets_32.c | 1 + arch/x86/kernel/asm-offsets_64.c | 1 + 6 files changed, 45 insertions(+), 8 deletions(-) (limited to 'arch/x86/boot/compressed/head_32.S') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 2b8e0dfa4b2..75e4f001e70 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -69,8 +69,11 @@ ENTRY(startup_32) #ifdef CONFIG_RELOCATABLE movl %ebp, %ebx - addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebx - andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebx + movl BP_kernel_alignment(%esi), %eax + decl %eax + addl %eax, %ebx + notl %eax + andl %eax, %ebx #else movl $LOAD_PHYSICAL_ADDR, %ebx #endif diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 2bb500af1bd..f62c284db9e 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -84,8 +84,11 @@ ENTRY(startup_32) #ifdef CONFIG_RELOCATABLE movl %ebp, %ebx - addl $(PMD_PAGE_SIZE -1), %ebx - andl $PMD_PAGE_MASK, %ebx + movl BP_kernel_alignment(%esi), %eax + decl %eax + addl %eax, %ebx + notl %eax + andl %eax, %ebx #else movl $LOAD_PHYSICAL_ADDR, %ebx #endif @@ -224,8 +227,11 @@ ENTRY(startup_64) /* Start with the delta to where the kernel will run at. */ #ifdef CONFIG_RELOCATABLE leaq startup_32(%rip) /* - $startup_32 */, %rbp - addq $(PMD_PAGE_SIZE - 1), %rbp - andq $PMD_PAGE_MASK, %rbp + movl BP_kernel_alignment(%rsi), %eax + decl %eax + addq %rax, %rbp + notq %rax + andq %rax, %rbp #else movq $LOAD_PHYSICAL_ADDR, %rbp #endif diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 27285143ade..a0b426978d5 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -116,7 +116,7 @@ _start: # Part 2 of the header, from the old setup.S .ascii "HdrS" # header signature - .word 0x0209 # header version number (>= 0x0105) + .word 0x020a # header version number (>= 0x0105) # or else old loadlin-1.5 will fail) .globl realmode_swtch realmode_swtch: .word 0, 0 # default_switch, SETUPSEG @@ -201,7 +201,7 @@ relocatable_kernel: .byte 1 #else relocatable_kernel: .byte 0 #endif -pad2: .byte 0 +min_alignment: .byte MIN_KERNEL_ALIGN_LG2 # minimum alignment pad3: .word 0 cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, @@ -220,6 +220,17 @@ setup_data: .quad 0 # 64-bit physical pointer to # single linked list of # struct setup_data +pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr + +#define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_extract_offset) +#define VO_INIT_SIZE (VO__end - VO__text) +#if ZO_INIT_SIZE > VO_INIT_SIZE +#define INIT_SIZE ZO_INIT_SIZE +#else +#define INIT_SIZE VO_INIT_SIZE +#endif +init_size: .long INIT_SIZE # kernel initialization size + # End of setup header ##################################################### .section ".inittext", "ax" diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 6ba23dd9fc9..418e632d4a8 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -8,11 +8,26 @@ #ifdef __KERNEL__ +#include + /* Physical address where kernel should be loaded. */ #define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ + (CONFIG_PHYSICAL_ALIGN - 1)) \ & ~(CONFIG_PHYSICAL_ALIGN - 1)) +/* Minimum kernel alignment, as a power of two */ +#ifdef CONFIG_x86_64 +#define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT +#else +#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT+1) +#endif +#define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) + +#if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \ + (CONFIG_PHYSICAL_ALIGN < (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)) +#error "Invalid value for CONFIG_PHYSICAL_ALIGN" +#endif + #ifdef CONFIG_KERNEL_BZIP2 #define BOOT_HEAP_SIZE 0x400000 #else /* !CONFIG_KERNEL_BZIP2 */ diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 5a6aa1c1162..1a830cbd701 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -146,4 +146,5 @@ void foo(void) OFFSET(BP_loadflags, boot_params, hdr.loadflags); OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); OFFSET(BP_version, boot_params, hdr.version); + OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); } diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index e72f062fb4b..898ecc47e12 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -125,6 +125,7 @@ int main(void) OFFSET(BP_loadflags, boot_params, hdr.loadflags); OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); OFFSET(BP_version, boot_params, hdr.version); + OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); BLANK(); DEFINE(PAGE_SIZE_asm, PAGE_SIZE); -- cgit v1.2.3-70-g09d2