diff options
Diffstat (limited to 'arch/powerpc/lib')
-rw-r--r-- | arch/powerpc/lib/Makefile | 7 | ||||
-rw-r--r-- | arch/powerpc/lib/checksum_64.S | 482 | ||||
-rw-r--r-- | arch/powerpc/lib/checksum_wrappers_64.c | 102 | ||||
-rw-r--r-- | arch/powerpc/lib/copy_32.S | 2 | ||||
-rw-r--r-- | arch/powerpc/lib/ldstfp.S | 36 | ||||
-rw-r--r-- | arch/powerpc/lib/locks.c | 4 | ||||
-rw-r--r-- | arch/powerpc/lib/sstep.c | 8 |
7 files changed, 491 insertions, 150 deletions
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 5bb89c82807..889f2bc106d 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -4,9 +4,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror -ifeq ($(CONFIG_PPC64),y) -EXTRA_CFLAGS += -mno-minimal-toc -endif +ccflags-$(CONFIG_PPC64) := -mno-minimal-toc CFLAGS_REMOVE_code-patching.o = -pg CFLAGS_REMOVE_feature-fixups.o = -pg @@ -17,7 +15,8 @@ obj-$(CONFIG_PPC32) += div64.o copy_32.o obj-$(CONFIG_HAS_IOMEM) += devres.o obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ - memcpy_64.o usercopy_64.o mem_64.o string.o + memcpy_64.o usercopy_64.o mem_64.o string.o \ + checksum_wrappers_64.o obj-$(CONFIG_XMON) += sstep.o ldstfp.o obj-$(CONFIG_KPROBES) += sstep.o ldstfp.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sstep.o ldstfp.o diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index ef96c6c58ef..18245af38ae 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -65,165 +65,393 @@ _GLOBAL(csum_tcpudp_magic) srwi r3,r3,16 blr +#define STACKFRAMESIZE 256 +#define STK_REG(i) (112 + ((i)-14)*8) + /* * Computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit). * - * This code assumes at least halfword alignment, though the length - * can be any number of bytes. The sum is accumulated in r5. - * * csum_partial(r3=buff, r4=len, r5=sum) */ _GLOBAL(csum_partial) - subi r3,r3,8 /* we'll offset by 8 for the loads */ - srdi. r6,r4,3 /* divide by 8 for doubleword count */ - addic r5,r5,0 /* clear carry */ - beq 3f /* if we're doing < 8 bytes */ - andi. r0,r3,2 /* aligned on a word boundary already? */ - beq+ 1f - lhz r6,8(r3) /* do 2 bytes to get aligned */ - addi r3,r3,2 - subi r4,r4,2 - addc r5,r5,r6 - srdi. r6,r4,3 /* recompute number of doublewords */ - beq 3f /* any left? */ -1: mtctr r6 -2: ldu r6,8(r3) /* main sum loop */ - adde r5,r5,r6 - bdnz 2b - andi. r4,r4,7 /* compute bytes left to sum after doublewords */ -3: cmpwi 0,r4,4 /* is at least a full word left? */ - blt 4f - lwz r6,8(r3) /* sum this word */ + addic r0,r5,0 /* clear carry */ + + srdi. r6,r4,3 /* less than 8 bytes? */ + beq .Lcsum_tail_word + + /* + * If only halfword aligned, align to a double word. Since odd + * aligned addresses should be rare and they would require more + * work to calculate the correct checksum, we ignore that case + * and take the potential slowdown of unaligned loads. + */ + rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ + beq .Lcsum_aligned + + li r7,4 + sub r6,r7,r6 + mtctr r6 + +1: + lhz r6,0(r3) /* align to doubleword */ + subi r4,r4,2 + addi r3,r3,2 + adde r0,r0,r6 + bdnz 1b + +.Lcsum_aligned: + /* + * We unroll the loop such that each iteration is 64 bytes with an + * entry and exit limb of 64 bytes, meaning a minimum size of + * 128 bytes. + */ + srdi. r6,r4,7 + beq .Lcsum_tail_doublewords /* len < 128 */ + + srdi r6,r4,6 + subi r6,r6,1 + mtctr r6 + + stdu r1,-STACKFRAMESIZE(r1) + std r14,STK_REG(r14)(r1) + std r15,STK_REG(r15)(r1) + std r16,STK_REG(r16)(r1) + + ld r6,0(r3) + ld r9,8(r3) + + ld r10,16(r3) + ld r11,24(r3) + + /* + * On POWER6 and POWER7 back to back addes take 2 cycles because of + * the XER dependency. This means the fastest this loop can go is + * 16 cycles per iteration. The scheduling of the loop below has + * been shown to hit this on both POWER6 and POWER7. + */ + .align 5 +2: + adde r0,r0,r6 + ld r12,32(r3) + ld r14,40(r3) + + adde r0,r0,r9 + ld r15,48(r3) + ld r16,56(r3) + addi r3,r3,64 + + adde r0,r0,r10 + + adde r0,r0,r11 + + adde r0,r0,r12 + + adde r0,r0,r14 + + adde r0,r0,r15 + ld r6,0(r3) + ld r9,8(r3) + + adde r0,r0,r16 + ld r10,16(r3) + ld r11,24(r3) + bdnz 2b + + + adde r0,r0,r6 + ld r12,32(r3) + ld r14,40(r3) + + adde r0,r0,r9 + ld r15,48(r3) + ld r16,56(r3) + addi r3,r3,64 + + adde r0,r0,r10 + adde r0,r0,r11 + adde r0,r0,r12 + adde r0,r0,r14 + adde r0,r0,r15 + adde r0,r0,r16 + + ld r14,STK_REG(r14)(r1) + ld r15,STK_REG(r15)(r1) + ld r16,STK_REG(r16)(r1) + addi r1,r1,STACKFRAMESIZE + + andi. r4,r4,63 + +.Lcsum_tail_doublewords: /* Up to 127 bytes to go */ + srdi. r6,r4,3 + beq .Lcsum_tail_word + + mtctr r6 +3: + ld r6,0(r3) + addi r3,r3,8 + adde r0,r0,r6 + bdnz 3b + + andi. r4,r4,7 + +.Lcsum_tail_word: /* Up to 7 bytes to go */ + srdi. r6,r4,2 + beq .Lcsum_tail_halfword + + lwz r6,0(r3) addi r3,r3,4 + adde r0,r0,r6 subi r4,r4,4 - adde r5,r5,r6 -4: cmpwi 0,r4,2 /* is at least a halfword left? */ - blt+ 5f - lhz r6,8(r3) /* sum this halfword */ - addi r3,r3,2 - subi r4,r4,2 - adde r5,r5,r6 -5: cmpwi 0,r4,1 /* is at least a byte left? */ - bne+ 6f - lbz r6,8(r3) /* sum this byte */ - slwi r6,r6,8 /* this byte is assumed to be the upper byte of a halfword */ - adde r5,r5,r6 -6: addze r5,r5 /* add in final carry */ - rldicl r4,r5,32,0 /* fold two 32-bit halves together */ - add r3,r4,r5 - srdi r3,r3,32 - blr + +.Lcsum_tail_halfword: /* Up to 3 bytes to go */ + srdi. r6,r4,1 + beq .Lcsum_tail_byte + + lhz r6,0(r3) + addi r3,r3,2 + adde r0,r0,r6 + subi r4,r4,2 + +.Lcsum_tail_byte: /* Up to 1 byte to go */ + andi. r6,r4,1 + beq .Lcsum_finish + + lbz r6,0(r3) + sldi r9,r6,8 /* Pad the byte out to 16 bits */ + adde r0,r0,r9 + +.Lcsum_finish: + addze r0,r0 /* add in final carry */ + rldicl r4,r0,32,0 /* fold two 32 bit halves together */ + add r3,r4,r0 + srdi r3,r3,32 + blr + + + .macro source +100: + .section __ex_table,"a" + .align 3 + .llong 100b,.Lsrc_error + .previous + .endm + + .macro dest +200: + .section __ex_table,"a" + .align 3 + .llong 200b,.Ldest_error + .previous + .endm /* * Computes the checksum of a memory block at src, length len, * and adds in "sum" (32-bit), while copying the block to dst. * If an access exception occurs on src or dst, it stores -EFAULT - * to *src_err or *dst_err respectively, and (for an error on - * src) zeroes the rest of dst. - * - * This code needs to be reworked to take advantage of 64 bit sum+copy. - * However, due to tokenring halfword alignment problems this will be very - * tricky. For now we'll leave it until we instrument it somehow. + * to *src_err or *dst_err respectively. The caller must take any action + * required in this case (zeroing memory, recalculating partial checksum etc). * * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err) */ _GLOBAL(csum_partial_copy_generic) - addic r0,r6,0 - subi r3,r3,4 - subi r4,r4,4 - srwi. r6,r5,2 - beq 3f /* if we're doing < 4 bytes */ - andi. r9,r4,2 /* Align dst to longword boundary */ - beq+ 1f -81: lhz r6,4(r3) /* do 2 bytes to get aligned */ - addi r3,r3,2 + addic r0,r6,0 /* clear carry */ + + srdi. r6,r5,3 /* less than 8 bytes? */ + beq .Lcopy_tail_word + + /* + * If only halfword aligned, align to a double word. Since odd + * aligned addresses should be rare and they would require more + * work to calculate the correct checksum, we ignore that case + * and take the potential slowdown of unaligned loads. + * + * If the source and destination are relatively unaligned we only + * align the source. This keeps things simple. + */ + rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ + beq .Lcopy_aligned + + li r7,4 + sub r6,r7,r6 + mtctr r6 + +1: +source; lhz r6,0(r3) /* align to doubleword */ subi r5,r5,2 -91: sth r6,4(r4) - addi r4,r4,2 - addc r0,r0,r6 - srwi. r6,r5,2 /* # words to do */ - beq 3f -1: mtctr r6 -82: lwzu r6,4(r3) /* the bdnz has zero overhead, so it should */ -92: stwu r6,4(r4) /* be unnecessary to unroll this loop */ - adde r0,r0,r6 - bdnz 82b - andi. r5,r5,3 -3: cmpwi 0,r5,2 - blt+ 4f -83: lhz r6,4(r3) addi r3,r3,2 - subi r5,r5,2 -93: sth r6,4(r4) + adde r0,r0,r6 +dest; sth r6,0(r4) addi r4,r4,2 + bdnz 1b + +.Lcopy_aligned: + /* + * We unroll the loop such that each iteration is 64 bytes with an + * entry and exit limb of 64 bytes, meaning a minimum size of + * 128 bytes. + */ + srdi. r6,r5,7 + beq .Lcopy_tail_doublewords /* len < 128 */ + + srdi r6,r5,6 + subi r6,r6,1 + mtctr r6 + + stdu r1,-STACKFRAMESIZE(r1) + std r14,STK_REG(r14)(r1) + std r15,STK_REG(r15)(r1) + std r16,STK_REG(r16)(r1) + +source; ld r6,0(r3) +source; ld r9,8(r3) + +source; ld r10,16(r3) +source; ld r11,24(r3) + + /* + * On POWER6 and POWER7 back to back addes take 2 cycles because of + * the XER dependency. This means the fastest this loop can go is + * 16 cycles per iteration. The scheduling of the loop below has + * been shown to hit this on both POWER6 and POWER7. + */ + .align 5 +2: adde r0,r0,r6 -4: cmpwi 0,r5,1 - bne+ 5f -84: lbz r6,4(r3) -94: stb r6,4(r4) - slwi r6,r6,8 /* Upper byte of word */ +source; ld r12,32(r3) +source; ld r14,40(r3) + + adde r0,r0,r9 +source; ld r15,48(r3) +source; ld r16,56(r3) + addi r3,r3,64 + + adde r0,r0,r10 +dest; std r6,0(r4) +dest; std r9,8(r4) + + adde r0,r0,r11 +dest; std r10,16(r4) +dest; std r11,24(r4) + + adde r0,r0,r12 +dest; std r12,32(r4) +dest; std r14,40(r4) + + adde r0,r0,r14 +dest; std r15,48(r4) +dest; std r16,56(r4) + addi r4,r4,64 + + adde r0,r0,r15 +source; ld r6,0(r3) +source; ld r9,8(r3) + + adde r0,r0,r16 +source; ld r10,16(r3) +source; ld r11,24(r3) + bdnz 2b + + adde r0,r0,r6 -5: addze r3,r0 /* add in final carry (unlikely with 64-bit regs) */ - rldicl r4,r3,32,0 /* fold 64 bit value */ - add r3,r4,r3 - srdi r3,r3,32 - blr +source; ld r12,32(r3) +source; ld r14,40(r3) -/* These shouldn't go in the fixup section, since that would - cause the ex_table addresses to get out of order. */ + adde r0,r0,r9 +source; ld r15,48(r3) +source; ld r16,56(r3) + addi r3,r3,64 + + adde r0,r0,r10 +dest; std r6,0(r4) +dest; std r9,8(r4) + + adde r0,r0,r11 +dest; std r10,16(r4) +dest; std r11,24(r4) + + adde r0,r0,r12 +dest; std r12,32(r4) +dest; std r14,40(r4) + + adde r0,r0,r14 +dest; std r15,48(r4) +dest; std r16,56(r4) + addi r4,r4,64 + + adde r0,r0,r15 + adde r0,r0,r16 + + ld r14,STK_REG(r14)(r1) + ld r15,STK_REG(r15)(r1) + ld r16,STK_REG(r16)(r1) + addi r1,r1,STACKFRAMESIZE + + andi. r5,r5,63 + +.Lcopy_tail_doublewords: /* Up to 127 bytes to go */ + srdi. r6,r5,3 + beq .Lcopy_tail_word - .globl src_error_1 -src_error_1: - li r6,0 - subi r5,r5,2 -95: sth r6,4(r4) - addi r4,r4,2 - srwi. r6,r5,2 - beq 3f mtctr r6 - .globl src_error_2 -src_error_2: - li r6,0 -96: stwu r6,4(r4) - bdnz 96b -3: andi. r5,r5,3 - beq src_error - .globl src_error_3 -src_error_3: - li r6,0 - mtctr r5 - addi r4,r4,3 -97: stbu r6,1(r4) - bdnz 97b - .globl src_error -src_error: +3: +source; ld r6,0(r3) + addi r3,r3,8 + adde r0,r0,r6 +dest; std r6,0(r4) + addi r4,r4,8 + bdnz 3b + + andi. r5,r5,7 + +.Lcopy_tail_word: /* Up to 7 bytes to go */ + srdi. r6,r5,2 + beq .Lcopy_tail_halfword + +source; lwz r6,0(r3) + addi r3,r3,4 + adde r0,r0,r6 +dest; stw r6,0(r4) + addi r4,r4,4 + subi r5,r5,4 + +.Lcopy_tail_halfword: /* Up to 3 bytes to go */ + srdi. r6,r5,1 + beq .Lcopy_tail_byte + +source; lhz r6,0(r3) + addi r3,r3,2 + adde r0,r0,r6 +dest; sth r6,0(r4) + addi r4,r4,2 + subi r5,r5,2 + +.Lcopy_tail_byte: /* Up to 1 byte to go */ + andi. r6,r5,1 + beq .Lcopy_finish + +source; lbz r6,0(r3) + sldi r9,r6,8 /* Pad the byte out to 16 bits */ + adde r0,r0,r9 +dest; stb r6,0(r4) + +.Lcopy_finish: + addze r0,r0 /* add in final carry */ + rldicl r4,r0,32,0 /* fold two 32 bit halves together */ + add r3,r4,r0 + srdi r3,r3,32 + blr + +.Lsrc_error: cmpdi 0,r7,0 - beq 1f + beqlr li r6,-EFAULT stw r6,0(r7) -1: addze r3,r0 blr - .globl dst_error -dst_error: +.Ldest_error: cmpdi 0,r8,0 - beq 1f + beqlr li r6,-EFAULT stw r6,0(r8) -1: addze r3,r0 blr - -.section __ex_table,"a" - .align 3 - .llong 81b,src_error_1 - .llong 91b,dst_error - .llong 82b,src_error_2 - .llong 92b,dst_error - .llong 83b,src_error_3 - .llong 93b,dst_error - .llong 84b,src_error_3 - .llong 94b,dst_error - .llong 95b,dst_error - .llong 96b,dst_error - .llong 97b,dst_error diff --git a/arch/powerpc/lib/checksum_wrappers_64.c b/arch/powerpc/lib/checksum_wrappers_64.c new file mode 100644 index 00000000000..769b817fbb3 --- /dev/null +++ b/arch/powerpc/lib/checksum_wrappers_64.c @@ -0,0 +1,102 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2010 + * + * Author: Anton Blanchard <anton@au.ibm.com> + */ +#include <linux/module.h> +#include <linux/compiler.h> +#include <linux/types.h> +#include <asm/checksum.h> +#include <asm/uaccess.h> + +__wsum csum_and_copy_from_user(const void __user *src, void *dst, + int len, __wsum sum, int *err_ptr) +{ + unsigned int csum; + + might_sleep(); + + *err_ptr = 0; + + if (!len) { + csum = 0; + goto out; + } + + if (unlikely((len < 0) || !access_ok(VERIFY_READ, src, len))) { + *err_ptr = -EFAULT; + csum = (__force unsigned int)sum; + goto out; + } + + csum = csum_partial_copy_generic((void __force *)src, dst, + len, sum, err_ptr, NULL); + + if (unlikely(*err_ptr)) { + int missing = __copy_from_user(dst, src, len); + + if (missing) { + memset(dst + len - missing, 0, missing); + *err_ptr = -EFAULT; + } else { + *err_ptr = 0; + } + + csum = csum_partial(dst, len, sum); + } + +out: + return (__force __wsum)csum; +} +EXPORT_SYMBOL(csum_and_copy_from_user); + +__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, + __wsum sum, int *err_ptr) +{ + unsigned int csum; + + might_sleep(); + + *err_ptr = 0; + + if (!len) { + csum = 0; + goto out; + } + + if (unlikely((len < 0) || !access_ok(VERIFY_WRITE, dst, len))) { + *err_ptr = -EFAULT; + csum = -1; /* invalid checksum */ + goto out; + } + + csum = csum_partial_copy_generic(src, (void __force *)dst, + len, sum, NULL, err_ptr); + + if (unlikely(*err_ptr)) { + csum = csum_partial(src, len, sum); + + if (copy_to_user(dst, src, len)) { + *err_ptr = -EFAULT; + csum = -1; /* invalid checksum */ + } + } + +out: + return (__force __wsum)csum; +} +EXPORT_SYMBOL(csum_and_copy_to_user); diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 74a7f4130b4..55f19f9fd70 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -62,7 +62,7 @@ .text .stabs "arch/powerpc/lib/",N_SO,0,0,0f - .stabs "copy32.S",N_SO,0,0,0f + .stabs "copy_32.S",N_SO,0,0,0f 0: CACHELINE_BYTES = L1_CACHE_BYTES diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S index f6448636baf..6a85380520b 100644 --- a/arch/powerpc/lib/ldstfp.S +++ b/arch/powerpc/lib/ldstfp.S @@ -17,6 +17,8 @@ #include <asm/asm-offsets.h> #include <linux/errno.h> +#ifdef CONFIG_PPC_FPU + #define STKFRM (PPC_MIN_STKFRM + 16) .macro extab instr,handler @@ -81,7 +83,7 @@ _GLOBAL(do_lfs) mfmsr r6 ori r7,r6,MSR_FP cmpwi cr7,r3,0 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f stfd fr0,STKFRM-16(r1) @@ -93,7 +95,7 @@ _GLOBAL(do_lfs) lfd fr0,STKFRM-16(r1) 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -108,7 +110,7 @@ _GLOBAL(do_lfd) mfmsr r6 ori r7,r6,MSR_FP cmpwi cr7,r3,0 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f stfd fr0,STKFRM-16(r1) @@ -120,7 +122,7 @@ _GLOBAL(do_lfd) lfd fr0,STKFRM-16(r1) 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -135,7 +137,7 @@ _GLOBAL(do_stfs) mfmsr r6 ori r7,r6,MSR_FP cmpwi cr7,r3,0 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f stfd fr0,STKFRM-16(r1) @@ -147,7 +149,7 @@ _GLOBAL(do_stfs) lfd fr0,STKFRM-16(r1) 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -162,7 +164,7 @@ _GLOBAL(do_stfd) mfmsr r6 ori r7,r6,MSR_FP cmpwi cr7,r3,0 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f stfd fr0,STKFRM-16(r1) @@ -174,7 +176,7 @@ _GLOBAL(do_stfd) lfd fr0,STKFRM-16(r1) 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -229,7 +231,7 @@ _GLOBAL(do_lvx) oris r7,r6,MSR_VEC@h cmpwi cr7,r3,0 li r8,STKFRM-16 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f stvx vr0,r1,r8 @@ -241,7 +243,7 @@ _GLOBAL(do_lvx) lvx vr0,r1,r8 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -257,7 +259,7 @@ _GLOBAL(do_stvx) oris r7,r6,MSR_VEC@h cmpwi cr7,r3,0 li r8,STKFRM-16 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f stvx vr0,r1,r8 @@ -269,7 +271,7 @@ _GLOBAL(do_stvx) lvx vr0,r1,r8 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -325,7 +327,7 @@ _GLOBAL(do_lxvd2x) oris r7,r6,MSR_VSX@h cmpwi cr7,r3,0 li r8,STKFRM-16 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f STXVD2X(0,r1,r8) @@ -337,7 +339,7 @@ _GLOBAL(do_lxvd2x) LXVD2X(0,r1,r8) 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -353,7 +355,7 @@ _GLOBAL(do_stxvd2x) oris r7,r6,MSR_VSX@h cmpwi cr7,r3,0 li r8,STKFRM-16 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f STXVD2X(0,r1,r8) @@ -365,7 +367,7 @@ _GLOBAL(do_stxvd2x) LXVD2X(0,r1,r8) 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -373,3 +375,5 @@ _GLOBAL(do_stxvd2x) extab 2b,3b #endif /* CONFIG_VSX */ + +#endif /* CONFIG_PPC_FPU */ diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index 58e14fba11b..9b8182e8216 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -34,7 +34,7 @@ void __spin_yield(arch_spinlock_t *lock) return; holder_cpu = lock_value & 0xffff; BUG_ON(holder_cpu >= NR_CPUS); - yield_count = lppaca[holder_cpu].yield_count; + yield_count = lppaca_of(holder_cpu).yield_count; if ((yield_count & 1) == 0) return; /* virtual cpu is currently running */ rmb(); @@ -65,7 +65,7 @@ void __rw_yield(arch_rwlock_t *rw) return; /* no write lock at present */ holder_cpu = lock_value & 0xffff; BUG_ON(holder_cpu >= NR_CPUS); - yield_count = lppaca[holder_cpu].yield_count; + yield_count = lppaca_of(holder_cpu).yield_count; if ((yield_count & 1) == 0) return; /* virtual cpu is currently running */ rmb(); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index e0a9858d537..ae5189ab004 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -30,6 +30,7 @@ extern char system_call_common[]; #define XER_OV 0x40000000U #define XER_CA 0x20000000U +#ifdef CONFIG_PPC_FPU /* * Functions in ldstfp.S */ @@ -41,6 +42,7 @@ extern int do_lvx(int rn, unsigned long ea); extern int do_stvx(int rn, unsigned long ea); extern int do_lxvd2x(int rn, unsigned long ea); extern int do_stxvd2x(int rn, unsigned long ea); +#endif /* * Determine whether a conditional branch instruction would branch. @@ -290,6 +292,7 @@ static int __kprobes write_mem(unsigned long val, unsigned long ea, int nb, return write_mem_unaligned(val, ea, nb, regs); } +#ifdef CONFIG_PPC_FPU /* * Check the address and alignment, and call func to do the actual * load or store. @@ -351,6 +354,7 @@ static int __kprobes do_fp_store(int rn, int (*func)(int, unsigned long), } return err; } +#endif #ifdef CONFIG_ALTIVEC /* For Altivec/VMX, no need to worry about alignment */ @@ -1393,6 +1397,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) regs->gpr[rd] = byterev_4(val); goto ldst_done; +#ifdef CONFIG_PPC_CPU case 535: /* lfsx */ case 567: /* lfsux */ if (!(regs->msr & MSR_FP)) @@ -1424,6 +1429,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) ea = xform_ea(instr, regs, u); err = do_fp_store(rd, do_stfd, ea, 8, regs); goto ldst_done; +#endif #ifdef __powerpc64__ case 660: /* stdbrx */ @@ -1534,6 +1540,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) } while (++rd < 32); goto instr_done; +#ifdef CONFIG_PPC_FPU case 48: /* lfs */ case 49: /* lfsu */ if (!(regs->msr & MSR_FP)) @@ -1565,6 +1572,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) ea = dform_ea(instr, regs); err = do_fp_store(rd, do_stfd, ea, 8, regs); goto ldst_done; +#endif #ifdef __powerpc64__ case 58: /* ld[u], lwa */ |