summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2008-08-30 11:41:12 +1000
committerPaul Mackerras <paulus@samba.org>2008-09-15 11:08:35 -0700
commite31aa453bbc4886a7bd33e5c2afa526d6f55bd7a (patch)
treefefa13c13d7b1803fdaeb92143f83b1971f0ec8d
parent1f6a93e4c35e75d547b51f56ba8139ab1a91628c (diff)
powerpc: Use LOAD_REG_IMMEDIATE only for constants on 64-bit
Using LOAD_REG_IMMEDIATE to get the address of kernel symbols generates 5 instructions where LOAD_REG_ADDR can do it in one, and will generate R_PPC64_ADDR16_* relocations in the output when we get to making the kernel as a position-independent executable, which we'd rather not have to handle. This changes various bits of assembly code to use LOAD_REG_ADDR when we need to get the address of a symbol, or to use suitable position-independent code for cases where we can't access the TOC for various reasons, or if we're not running at the address we were linked at. It also cleans up a few minor things; there's no reason to save and restore SRR0/1 around RTAS calls, __mmu_off can get the return address from LR more conveniently than the caller can supply it in R4 (and we already assume elsewhere that EA == RA if the MMU is on in early boot), and enable_64b_mode was using 5 instructions where 2 would do. Signed-off-by: Paul Mackerras <paulus@samba.org>
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h2
-rw-r--r--arch/powerpc/kernel/cpu_setup_ppc970.S4
-rw-r--r--arch/powerpc/kernel/entry_64.S16
-rw-r--r--arch/powerpc/kernel/head_64.S181
-rw-r--r--arch/powerpc/kernel/misc.S10
-rw-r--r--arch/powerpc/platforms/iseries/exception.S23
6 files changed, 110 insertions, 126 deletions
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 0966899d974..c4a029ccb4d 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -268,7 +268,7 @@ n:
* Loads the value of the constant expression 'expr' into register 'rn'
* using immediate instructions only. Use this when it's important not
* to reference other data (i.e. on ppc64 when the TOC pointer is not
- * valid).
+ * valid) and when 'expr' is a constant or absolute address.
*
* LOAD_REG_ADDR(rn, name)
* Loads the address of label 'name' into register 'rn'. Use this when
diff --git a/arch/powerpc/kernel/cpu_setup_ppc970.S b/arch/powerpc/kernel/cpu_setup_ppc970.S
index bf118c38575..27f2507279d 100644
--- a/arch/powerpc/kernel/cpu_setup_ppc970.S
+++ b/arch/powerpc/kernel/cpu_setup_ppc970.S
@@ -110,7 +110,7 @@ load_hids:
isync
/* Save away cpu state */
- LOAD_REG_IMMEDIATE(r5,cpu_state_storage)
+ LOAD_REG_ADDR(r5,cpu_state_storage)
/* Save HID0,1,4 and 5 */
mfspr r3,SPRN_HID0
@@ -134,7 +134,7 @@ _GLOBAL(__restore_cpu_ppc970)
rldicl. r0,r0,4,63
beqlr
- LOAD_REG_IMMEDIATE(r5,cpu_state_storage)
+ LOAD_REG_ADDR(r5,cpu_state_storage)
/* Before accessing memory, we make sure rm_ci is clear */
li r0,0
mfspr r3,SPRN_HID4
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 55445f1dba8..fd8b4bae9b0 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -690,10 +690,6 @@ _GLOBAL(enter_rtas)
std r7,_DAR(r1)
mfdsisr r8
std r8,_DSISR(r1)
- mfsrr0 r9
- std r9,_SRR0(r1)
- mfsrr1 r10
- std r10,_SRR1(r1)
/* Temporary workaround to clear CR until RTAS can be modified to
* ignore all bits.
@@ -754,6 +750,10 @@ _STATIC(rtas_return_loc)
mfspr r4,SPRN_SPRG3 /* Get PACA */
clrldi r4,r4,2 /* convert to realmode address */
+ bcl 20,31,$+4
+0: mflr r3
+ ld r3,(1f-0b)(r3) /* get &.rtas_restore_regs */
+
mfmsr r6
li r0,MSR_RI
andc r6,r6,r0
@@ -761,7 +761,6 @@ _STATIC(rtas_return_loc)
mtmsrd r6
ld r1,PACAR1(r4) /* Restore our SP */
- LOAD_REG_IMMEDIATE(r3,.rtas_restore_regs)
ld r4,PACASAVEDMSR(r4) /* Restore our MSR */
mtspr SPRN_SRR0,r3
@@ -769,6 +768,9 @@ _STATIC(rtas_return_loc)
rfid
b . /* prevent speculative execution */
+ .align 3
+1: .llong .rtas_restore_regs
+
_STATIC(rtas_restore_regs)
/* relocation is on at this point */
REST_GPR(2, r1) /* Restore the TOC */
@@ -788,10 +790,6 @@ _STATIC(rtas_restore_regs)
mtdar r7
ld r8,_DSISR(r1)
mtdsisr r8
- ld r9,_SRR0(r1)
- mtsrr0 r9
- ld r10,_SRR1(r1)
- mtsrr1 r10
addi r1,r1,RTAS_FRAME_SIZE /* Unstack our frame */
ld r0,16(r1) /* get return address */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 97bb6e6f67b..6cdfd44d8ef 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -128,11 +128,11 @@ __secondary_hold:
/* Tell the master cpu we're here */
/* Relocation is off & we are located at an address less */
/* than 0x100, so only need to grab low order offset. */
- std r24,__secondary_hold_acknowledge@l(0)
+ std r24,__secondary_hold_acknowledge-_stext(0)
sync
/* All secondary cpus wait here until told to start. */
-100: ld r4,__secondary_hold_spinloop@l(0)
+100: ld r4,__secondary_hold_spinloop-_stext(0)
cmpdi 0,r4,0
beq 100b
@@ -1223,11 +1223,14 @@ _GLOBAL(generic_secondary_smp_init)
/* turn on 64-bit mode */
bl .enable_64b_mode
+ /* get the TOC pointer (real address) */
+ bl .relative_toc
+
/* Set up a paca value for this processor. Since we have the
* physical cpu id in r24, we need to search the pacas to find
* which logical id maps to our physical one.
*/
- LOAD_REG_IMMEDIATE(r13, paca) /* Get base vaddr of paca array */
+ LOAD_REG_ADDR(r13, paca) /* Get base vaddr of paca array */
li r5,0 /* logical cpu id */
1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */
cmpw r6,r24 /* Compare to our id */
@@ -1256,7 +1259,7 @@ _GLOBAL(generic_secondary_smp_init)
sync /* order paca.run and cur_cpu_spec */
/* See if we need to call a cpu state restore handler */
- LOAD_REG_IMMEDIATE(r23, cur_cpu_spec)
+ LOAD_REG_ADDR(r23, cur_cpu_spec)
ld r23,0(r23)
ld r23,CPU_SPEC_RESTORE(r23)
cmpdi 0,r23,0
@@ -1272,10 +1275,15 @@ _GLOBAL(generic_secondary_smp_init)
b __secondary_start
#endif
+/*
+ * Turn the MMU off.
+ * Assumes we're mapped EA == RA if the MMU is on.
+ */
_STATIC(__mmu_off)
mfmsr r3
andi. r0,r3,MSR_IR|MSR_DR
beqlr
+ mflr r4
andc r3,r3,r0
mtspr SPRN_SRR0,r4
mtspr SPRN_SRR1,r3
@@ -1296,6 +1304,18 @@ _STATIC(__mmu_off)
*
*/
_GLOBAL(__start_initialization_multiplatform)
+ /* Make sure we are running in 64 bits mode */
+ bl .enable_64b_mode
+
+ /* Get TOC pointer (current runtime address) */
+ bl .relative_toc
+
+ /* find out where we are now */
+ bcl 20,31,$+4
+0: mflr r26 /* r26 = runtime addr here */
+ addis r26,r26,(_stext - 0b)@ha
+ addi r26,r26,(_stext - 0b)@l /* current runtime base addr */
+
/*
* Are we booted from a PROM Of-type client-interface ?
*/
@@ -1307,9 +1327,6 @@ _GLOBAL(__start_initialization_multiplatform)
mr r31,r3
mr r30,r4
- /* Make sure we are running in 64 bits mode */
- bl .enable_64b_mode
-
/* Setup some critical 970 SPRs before switching MMU off */
mfspr r0,SPRN_PVR
srwi r0,r0,16
@@ -1324,9 +1341,7 @@ _GLOBAL(__start_initialization_multiplatform)
1: bl .__cpu_preinit_ppc970
2:
- /* Switch off MMU if not already */
- LOAD_REG_IMMEDIATE(r4, .__after_prom_start - KERNELBASE)
- add r4,r4,r30
+ /* Switch off MMU if not already off */
bl .__mmu_off
b .__after_prom_start
@@ -1341,23 +1356,10 @@ _INIT_STATIC(__boot_from_prom)
/*
* Align the stack to 16-byte boundary
* Depending on the size and layout of the ELF sections in the initial
- * boot binary, the stack pointer will be unalignet on PowerMac
+ * boot binary, the stack pointer may be unaligned on PowerMac
*/
rldicr r1,r1,0,59
- /* Make sure we are running in 64 bits mode */
- bl .enable_64b_mode
-
- /* put a relocation offset into r3 */
- bl .reloc_offset
-
- LOAD_REG_IMMEDIATE(r2,__toc_start)
- addi r2,r2,0x4000
- addi r2,r2,0x4000
-
- /* Relocate the TOC from a virt addr to a real addr */
- add r2,r2,r3
-
/* Restore parameters */
mr r3,r31
mr r4,r30
@@ -1373,53 +1375,37 @@ _INIT_STATIC(__boot_from_prom)
_STATIC(__after_prom_start)
/*
- * We need to run with __start at physical address PHYSICAL_START.
+ * We need to run with _stext at physical address PHYSICAL_START.
* This will leave some code in the first 256B of
* real memory, which are reserved for software use.
- * The remainder of the first page is loaded with the fixed
- * interrupt vectors. The next two pages are filled with
- * unknown exception placeholders.
*
* Note: This process overwrites the OF exception vectors.
- * r26 == relocation offset
- * r27 == KERNELBASE
*/
- bl .reloc_offset
- mr r26,r3
- LOAD_REG_IMMEDIATE(r27, KERNELBASE)
-
LOAD_REG_IMMEDIATE(r3, PHYSICAL_START) /* target addr */
-
- // XXX FIXME: Use phys returned by OF (r30)
- add r4,r27,r26 /* source addr */
- /* current address of _start */
- /* i.e. where we are running */
- /* the source addr */
-
- cmpdi r4,0 /* In some cases the loader may */
- bne 1f
- b .start_here_multiplatform /* have already put us at zero */
- /* so we can skip the copy. */
-1: LOAD_REG_IMMEDIATE(r5,copy_to_here) /* # bytes of memory to copy */
- sub r5,r5,r27
-
+ cmpd r3,r26 /* In some cases the loader may */
+ beq 9f /* have already put us at zero */
+ mr r4,r26 /* source address */
+ lis r5,(copy_to_here - _stext)@ha
+ addi r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
li r6,0x100 /* Start offset, the first 0x100 */
/* bytes were copied earlier. */
bl .copy_and_flush /* copy the first n bytes */
/* this includes the code being */
/* executed here. */
-
- LOAD_REG_IMMEDIATE(r0, 4f) /* Jump to the copy of this code */
- mtctr r0 /* that we just made/relocated */
+ addis r8,r3,(4f - _stext)@ha /* Jump to the copy of this code */
+ addi r8,r8,(4f - _stext)@l /* that we just made */
+ mtctr r8
bctr
-4: LOAD_REG_IMMEDIATE(r5,klimit)
- add r5,r5,r26
- ld r5,0(r5) /* get the value of klimit */
- sub r5,r5,r27
+4: /* Now copy the rest of the kernel up to _end */
+ addis r5,r26,(p_end - _stext)@ha
+ ld r5,(p_end - _stext)@l(r5) /* get _end */
bl .copy_and_flush /* copy the rest */
- b .start_here_multiplatform
+
+9: b .start_here_multiplatform
+
+p_end: .llong _end - _stext
/*
* Copy routine used to copy the kernel to start at physical address 0
@@ -1484,6 +1470,9 @@ _GLOBAL(pmac_secondary_start)
/* turn on 64-bit mode */
bl .enable_64b_mode
+ /* get TOC pointer (real address) */
+ bl .relative_toc
+
/* Copy some CPU settings from CPU 0 */
bl .__restore_cpu_ppc970
@@ -1493,10 +1482,10 @@ _GLOBAL(pmac_secondary_start)
mtmsrd r3 /* RI on */
/* Set up a paca value for this processor. */
- LOAD_REG_IMMEDIATE(r4, paca) /* Get base vaddr of paca array */
- mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */
+ LOAD_REG_ADDR(r4,paca) /* Get base vaddr of paca array */
+ mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */
add r13,r13,r4 /* for this processor. */
- mtspr SPRN_SPRG3,r13 /* Save vaddr of paca in SPRG3 */
+ mtspr SPRN_SPRG3,r13 /* Save vaddr of paca in SPRG3 */
/* Create a temp kernel stack for use before relocation is on. */
ld r1,PACAEMERGSP(r13)
@@ -1524,9 +1513,6 @@ __secondary_start:
/* Set thread priority to MEDIUM */
HMT_MEDIUM
- /* Load TOC */
- ld r2,PACATOC(r13)
-
/* Do early setup for that CPU (stab, slb, hash table pointer) */
bl .early_setup_secondary
@@ -1563,9 +1549,11 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES)
/*
* Running with relocation on at this point. All we want to do is
- * zero the stack back-chain pointer before going into C code.
+ * zero the stack back-chain pointer and get the TOC virtual address
+ * before going into C code.
*/
_GLOBAL(start_secondary_prolog)
+ ld r2,PACATOC(r13)
li r3,0
std r3,0(r1) /* Zero the stack frame pointer */
bl .start_secondary
@@ -1577,34 +1565,46 @@ _GLOBAL(start_secondary_prolog)
*/
_GLOBAL(enable_64b_mode)
mfmsr r11 /* grab the current MSR */
- li r12,1
- rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG)
- or r11,r11,r12
- li r12,1
- rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG)
+ li r12,(MSR_SF | MSR_ISF)@highest
+ sldi r12,r12,48
or r11,r11,r12
mtmsrd r11
isync
blr
/*
+ * This puts the TOC pointer into r2, offset by 0x8000 (as expected
+ * by the toolchain). It computes the correct value for wherever we
+ * are running at the moment, using position-independent code.
+ */
+_GLOBAL(relative_toc)
+ mflr r0
+ bcl 20,31,$+4
+0: mflr r9
+ ld r2,(p_toc - 0b)(r9)
+ add r2,r2,r9
+ mtlr r0
+ blr
+
+p_toc: .llong __toc_start + 0x8000 - 0b
+
+/*
* This is where the main kernel code starts.
*/
_INIT_STATIC(start_here_multiplatform)
- /* get a new offset, now that the kernel has moved. */
- bl .reloc_offset
- mr r26,r3
+ /* set up the TOC (real address) */
+ bl .relative_toc
/* Clear out the BSS. It may have been done in prom_init,
* already but that's irrelevant since prom_init will soon
* be detached from the kernel completely. Besides, we need
* to clear it now for kexec-style entry.
*/
- LOAD_REG_IMMEDIATE(r11,__bss_stop)
- LOAD_REG_IMMEDIATE(r8,__bss_start)
+ LOAD_REG_ADDR(r11,__bss_stop)
+ LOAD_REG_ADDR(r8,__bss_start)
sub r11,r11,r8 /* bss size */
addi r11,r11,7 /* round up to an even double word */
- rldicl. r11,r11,61,3 /* shift right by 3 */
+ srdi. r11,r11,3 /* shift right by 3 */
beq 4f
addi r8,r8,-8
li r0,0
@@ -1617,35 +1617,28 @@ _INIT_STATIC(start_here_multiplatform)
ori r6,r6,MSR_RI
mtmsrd r6 /* RI on */
- /* The following gets the stack and TOC set up with the regs */
+ /* The following gets the stack set up with the regs */
/* pointing to the real addr of the kernel stack. This is */
/* all done to support the C function call below which sets */
/* up the htab. This is done because we have relocated the */
/* kernel but are still running in real mode. */
- LOAD_REG_IMMEDIATE(r3,init_thread_union)
- add r3,r3,r26
+ LOAD_REG_ADDR(r3,init_thread_union)
- /* set up a stack pointer (physical address) */
+ /* set up a stack pointer */
addi r1,r3,THREAD_SIZE
li r0,0
stdu r0,-STACK_FRAME_OVERHEAD(r1)
- /* set up the TOC (physical address) */
- LOAD_REG_IMMEDIATE(r2,__toc_start)
- addi r2,r2,0x4000
- addi r2,r2,0x4000
- add r2,r2,r26
-
/* Do very early kernel initializations, including initial hash table,
* stab and slb setup before we turn on relocation. */
/* Restore parameters passed from prom_init/kexec */
mr r3,r31
- bl .early_setup
+ bl .early_setup /* also sets r13 and SPRG3 */
- LOAD_REG_IMMEDIATE(r3, .start_here_common)
- LOAD_REG_IMMEDIATE(r4, MSR_KERNEL)
+ LOAD_REG_ADDR(r3, .start_here_common)
+ ld r4,PACAKMSR(r13)
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
rfid
@@ -1654,20 +1647,10 @@ _INIT_STATIC(start_here_multiplatform)
/* This is where all platforms converge execution */
_INIT_GLOBAL(start_here_common)
/* relocation is on at this point */
+ std r1,PACAKSAVE(r13)
- /* The following code sets up the SP and TOC now that we are */
- /* running with translation enabled. */
-
- LOAD_REG_IMMEDIATE(r3,init_thread_union)
-
- /* set up the stack */
- addi r1,r3,THREAD_SIZE
- li r0,0
- stdu r0,-STACK_FRAME_OVERHEAD(r1)
-
- /* Load the TOC */
+ /* Load the TOC (virtual address) */
ld r2,PACATOC(r13)
- std r1,PACAKSAVE(r13)
bl .setup_system
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 85cb6f34084..2d29752cbe1 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -31,11 +31,14 @@ _GLOBAL(reloc_offset)
mflr r0
bl 1f
1: mflr r3
- LOAD_REG_IMMEDIATE(r4,1b)
+ PPC_LL r4,(2f-1b)(r3)
subf r3,r4,r3
mtlr r0
blr
+ .align 3
+2: PPC_LONG 1b
+
/*
* add_reloc_offset(x) returns x + reloc_offset().
*/
@@ -43,12 +46,15 @@ _GLOBAL(add_reloc_offset)
mflr r0
bl 1f
1: mflr r5
- LOAD_REG_IMMEDIATE(r4,1b)
+ PPC_LL r4,(2f-1b)(r5)
subf r5,r4,r5
add r3,r3,r5
mtlr r0
blr
+ .align 3
+2: PPC_LONG 1b
+
_GLOBAL(kernel_execve)
li r0,__NR_execve
sc
diff --git a/arch/powerpc/platforms/iseries/exception.S b/arch/powerpc/platforms/iseries/exception.S
index 8ff330d026c..2f581521eb9 100644
--- a/arch/powerpc/platforms/iseries/exception.S
+++ b/arch/powerpc/platforms/iseries/exception.S
@@ -38,12 +38,13 @@
.globl system_reset_iSeries
system_reset_iSeries:
+ bl .relative_toc
mfspr r13,SPRN_SPRG3 /* Get alpaca address */
- LOAD_REG_IMMEDIATE(r23, alpaca)
+ LOAD_REG_ADDR(r23, alpaca)
li r0,ALPACA_SIZE
sub r23,r13,r23
divdu r23,r23,r0 /* r23 has cpu number */
- LOAD_REG_IMMEDIATE(r13, paca)
+ LOAD_REG_ADDR(r13, paca)
mulli r0,r23,PACA_SIZE
add r13,r13,r0
mtspr SPRN_SPRG3,r13 /* Save it away for the future */
@@ -60,14 +61,14 @@ system_reset_iSeries:
mtspr SPRN_CTRLT,r4
/* Spin on __secondary_hold_spinloop until it is updated by the boot cpu. */
-/* In the UP case we'll yeild() later, and we will not access the paca anyway */
+/* In the UP case we'll yield() later, and we will not access the paca anyway */
#ifdef CONFIG_SMP
1:
HMT_LOW
- LOAD_REG_IMMEDIATE(r23, __secondary_hold_spinloop)
+ LOAD_REG_ADDR(r23, __secondary_hold_spinloop)
ld r23,0(r23)
sync
- LOAD_REG_IMMEDIATE(r3,current_set)
+ LOAD_REG_ADDR(r3,current_set)
sldi r28,r24,3 /* get current_set[cpu#] */
ldx r3,r3,r28
addi r1,r3,THREAD_SIZE
@@ -90,7 +91,7 @@ system_reset_iSeries:
lbz r23,PACAPROCSTART(r13) /* Test if this processor
* should start */
sync
- LOAD_REG_IMMEDIATE(r3,current_set)
+ LOAD_REG_ADDR(r3,current_set)
sldi r28,r24,3 /* get current_set[cpu#] */
ldx r3,r3,r28
addi r1,r3,THREAD_SIZE
@@ -255,8 +256,8 @@ hardware_interrupt_iSeries_masked:
_INIT_STATIC(__start_initialization_iSeries)
/* Clear out the BSS */
- LOAD_REG_IMMEDIATE(r11,__bss_stop)
- LOAD_REG_IMMEDIATE(r8,__bss_start)
+ LOAD_REG_ADDR(r11,__bss_stop)
+ LOAD_REG_ADDR(r8,__bss_start)
sub r11,r11,r8 /* bss size */
addi r11,r11,7 /* round up to an even double word */
rldicl. r11,r11,61,3 /* shift right by 3 */
@@ -267,15 +268,11 @@ _INIT_STATIC(__start_initialization_iSeries)
3: stdu r0,8(r8)
bdnz 3b
4:
- LOAD_REG_IMMEDIATE(r1,init_thread_union)
+ LOAD_REG_ADDR(r1,init_thread_union)
addi r1,r1,THREAD_SIZE
li r0,0
stdu r0,-STACK_FRAME_OVERHEAD(r1)
- LOAD_REG_IMMEDIATE(r2,__toc_start)
- addi r2,r2,0x4000
- addi r2,r2,0x4000
-
bl .iSeries_early_setup
bl .early_setup