diff options
Diffstat (limited to 'arch/arm/lib')
49 files changed, 5136 insertions, 0 deletions
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile new file mode 100644 index 00000000000..c0e65833ffc --- /dev/null +++ b/arch/arm/lib/Makefile @@ -0,0 +1,29 @@ +# +# linux/arch/arm/lib/Makefile +# +# Copyright (C) 1995-2000 Russell King +# + +lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ + csumpartialcopy.o csumpartialcopyuser.o clearbit.o \ + copy_page.o delay.o findbit.o memchr.o memcpy.o \ + memset.o memzero.o setbit.o strncpy_from_user.o \ + strnlen_user.o strchr.o strrchr.o testchangebit.o \ + testclearbit.o testsetbit.o uaccess.o getuser.o \ + putuser.o ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \ + ucmpdi2.o udivdi3.o lib1funcs.o div64.o \ + io-readsb.o io-writesb.o io-readsl.o io-writesl.o + +ifeq ($(CONFIG_CPU_32v3),y) + lib-y += io-readsw-armv3.o io-writesw-armv3.o +else + lib-y += io-readsw-armv4.o io-writesw-armv4.o +endif + +lib-$(CONFIG_ARCH_RPC) += ecard.o io-acorn.o floppydma.o +lib-$(CONFIG_ARCH_CLPS7500) += io-acorn.o +lib-$(CONFIG_ARCH_L7200) += io-acorn.o +lib-$(CONFIG_ARCH_SHARK) += io-shark.o + +$(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S +$(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S diff --git a/arch/arm/lib/ashldi3.c b/arch/arm/lib/ashldi3.c new file mode 100644 index 00000000000..130f5a83966 --- /dev/null +++ b/arch/arm/lib/ashldi3.c @@ -0,0 +1,61 @@ +/* More subroutines needed by GCC output code on some machines. */ +/* Compile this one with gcc. */ +/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you link this library with other files, + some of which are compiled with GCC, to produce an executable, + this library does not by itself cause the resulting executable + to be covered by the GNU General Public License. + This exception does not however invalidate any other reasons why + the executable file might be covered by the GNU General Public License. + */ +/* support functions required by the kernel. based on code from gcc-2.95.3 */ +/* I Molton 29/07/01 */ + +#include "gcclib.h" + +DItype +__ashldi3 (DItype u, word_type b) +{ + DIunion w; + word_type bm; + DIunion uu; + + if (b == 0) + return u; + + uu.ll = u; + + bm = (sizeof (SItype) * BITS_PER_UNIT) - b; + if (bm <= 0) + { + w.s.low = 0; + w.s.high = (USItype)uu.s.low << -bm; + } + else + { + USItype carries = (USItype)uu.s.low >> bm; + w.s.low = (USItype)uu.s.low << b; + w.s.high = ((USItype)uu.s.high << b) | carries; + } + + return w.ll; +} + diff --git a/arch/arm/lib/ashrdi3.c b/arch/arm/lib/ashrdi3.c new file mode 100644 index 00000000000..71625d218f8 --- /dev/null +++ b/arch/arm/lib/ashrdi3.c @@ -0,0 +1,61 @@ +/* More subroutines needed by GCC output code on some machines. */ +/* Compile this one with gcc. */ +/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you link this library with other files, + some of which are compiled with GCC, to produce an executable, + this library does not by itself cause the resulting executable + to be covered by the GNU General Public License. + This exception does not however invalidate any other reasons why + the executable file might be covered by the GNU General Public License. + */ +/* support functions required by the kernel. based on code from gcc-2.95.3 */ +/* I Molton 29/07/01 */ + +#include "gcclib.h" + +DItype +__ashrdi3 (DItype u, word_type b) +{ + DIunion w; + word_type bm; + DIunion uu; + + if (b == 0) + return u; + + uu.ll = u; + + bm = (sizeof (SItype) * BITS_PER_UNIT) - b; + if (bm <= 0) + { + /* w.s.high = 1..1 or 0..0 */ + w.s.high = uu.s.high >> (sizeof (SItype) * BITS_PER_UNIT - 1); + w.s.low = uu.s.high >> -bm; + } + else + { + USItype carries = (USItype)uu.s.high << bm; + w.s.high = uu.s.high >> b; + w.s.low = ((USItype)uu.s.low >> b) | carries; + } + + return w.ll; +} diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S new file mode 100644 index 00000000000..68a21c0f3f5 --- /dev/null +++ b/arch/arm/lib/backtrace.S @@ -0,0 +1,157 @@ +/* + * linux/arch/arm/lib/backtrace.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 27/03/03 Ian Molton Clean up CONFIG_CPU + * + */ +#include <linux/config.h> +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +@ fp is 0 or stack frame + +#define frame r4 +#define next r5 +#define save r6 +#define mask r7 +#define offset r8 + +ENTRY(__backtrace) + mov r1, #0x10 + mov r0, fp + +ENTRY(c_backtrace) + +#ifndef CONFIG_FRAME_POINTER + mov pc, lr +#else + + stmfd sp!, {r4 - r8, lr} @ Save an extra register so we have a location... + tst r1, #0x10 @ 26 or 32-bit? + moveq mask, #0xfc000003 + movne mask, #0 + tst mask, r0 + movne r0, #0 + movs frame, r0 +1: moveq r0, #-2 + LOADREGS(eqfd, sp!, {r4 - r8, pc}) + +2: stmfd sp!, {pc} @ calculate offset of PC in STMIA instruction + ldr r0, [sp], #4 + adr r1, 2b - 4 + sub offset, r0, r1 + +3: tst frame, mask @ Check for address exceptions... + bne 1b + +1001: ldr next, [frame, #-12] @ get fp +1002: ldr r2, [frame, #-4] @ get lr +1003: ldr r3, [frame, #0] @ get pc + sub save, r3, offset @ Correct PC for prefetching + bic save, save, mask +1004: ldr r1, [save, #0] @ get instruction at function + mov r1, r1, lsr #10 + ldr r3, .Ldsi+4 + teq r1, r3 + subeq save, save, #4 + mov r0, save + bic r1, r2, mask + bl dump_backtrace_entry + + ldr r0, [frame, #-8] @ get sp + sub r0, r0, #4 +1005: ldr r1, [save, #4] @ get instruction at function+4 + mov r3, r1, lsr #10 + ldr r2, .Ldsi+4 + teq r3, r2 @ Check for stmia sp!, {args} + addeq save, save, #4 @ next instruction + bleq .Ldumpstm + + sub r0, frame, #16 +1006: ldr r1, [save, #4] @ Get 'stmia sp!, {rlist, fp, ip, lr, pc}' instruction + mov r3, r1, lsr #10 + ldr r2, .Ldsi + teq r3, r2 + bleq .Ldumpstm + + /* + * A zero next framepointer means we're done. + */ + teq next, #0 + LOADREGS(eqfd, sp!, {r4 - r8, pc}) + + /* + * The next framepointer must be above the + * current framepointer. + */ + cmp next, frame + mov frame, next + bhi 3b + b 1007f + +/* + * Fixup for LDMDB + */ + .section .fixup,"ax" + .align 0 +1007: ldr r0, =.Lbad + mov r1, frame + bl printk + LOADREGS(fd, sp!, {r4 - r8, pc}) + .ltorg + .previous + + .section __ex_table,"a" + .align 3 + .long 1001b, 1007b + .long 1002b, 1007b + .long 1003b, 1007b + .long 1004b, 1007b + .long 1005b, 1007b + .long 1006b, 1007b + .previous + +#define instr r4 +#define reg r5 +#define stack r6 + +.Ldumpstm: stmfd sp!, {instr, reg, stack, r7, lr} + mov stack, r0 + mov instr, r1 + mov reg, #9 + mov r7, #0 +1: mov r3, #1 + tst instr, r3, lsl reg + beq 2f + add r7, r7, #1 + teq r7, #4 + moveq r7, #0 + moveq r3, #'\n' + movne r3, #' ' + ldr r2, [stack], #-4 + mov r1, reg + adr r0, .Lfp + bl printk +2: subs reg, reg, #1 + bpl 1b + teq r7, #0 + adrne r0, .Lcr + blne printk + mov r0, stack + LOADREGS(fd, sp!, {instr, reg, stack, r7, pc}) + +.Lfp: .asciz " r%d = %08X%c" +.Lcr: .asciz "\n" +.Lbad: .asciz "Backtrace aborted due to bad frame pointer <%p>\n" + .align +.Ldsi: .word 0x00e92dd8 >> 2 + .word 0x00e92d00 >> 2 + +#endif diff --git a/arch/arm/lib/changebit.S b/arch/arm/lib/changebit.S new file mode 100644 index 00000000000..3af45cab70e --- /dev/null +++ b/arch/arm/lib/changebit.S @@ -0,0 +1,28 @@ +/* + * linux/arch/arm/lib/changebit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +/* Purpose : Function to change a bit + * Prototype: int change_bit(int bit, void *addr) + */ +ENTRY(_change_bit_be) + eor r0, r0, #0x18 @ big endian byte ordering +ENTRY(_change_bit_le) + and r2, r0, #7 + mov r3, #1 + mov r3, r3, lsl r2 + save_and_disable_irqs ip, r2 + ldrb r2, [r1, r0, lsr #3] + eor r2, r2, r3 + strb r2, [r1, r0, lsr #3] + restore_irqs ip + RETINSTR(mov,pc,lr) diff --git a/arch/arm/lib/clearbit.S b/arch/arm/lib/clearbit.S new file mode 100644 index 00000000000..069a2ce413f --- /dev/null +++ b/arch/arm/lib/clearbit.S @@ -0,0 +1,31 @@ +/* + * linux/arch/arm/lib/clearbit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +/* + * Purpose : Function to clear a bit + * Prototype: int clear_bit(int bit, void *addr) + */ +ENTRY(_clear_bit_be) + eor r0, r0, #0x18 @ big endian byte ordering +ENTRY(_clear_bit_le) + and r2, r0, #7 + mov r3, #1 + mov r3, r3, lsl r2 + save_and_disable_irqs ip, r2 + ldrb r2, [r1, r0, lsr #3] + bic r2, r2, r3 + strb r2, [r1, r0, lsr #3] + restore_irqs ip + RETINSTR(mov,pc,lr) + + diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S new file mode 100644 index 00000000000..4c38abdbe49 --- /dev/null +++ b/arch/arm/lib/copy_page.S @@ -0,0 +1,46 @@ +/* + * linux/arch/arm/lib/copypage.S + * + * Copyright (C) 1995-1999 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/constants.h> + +#define COPY_COUNT (PAGE_SZ/64 PLD( -1 )) + + .text + .align 5 +/* + * StrongARM optimised copy_page routine + * now 1.78bytes/cycle, was 1.60 bytes/cycle (50MHz bus -> 89MB/s) + * Note that we probably achieve closer to the 100MB/s target with + * the core clock switching. + */ +ENTRY(copy_page) + stmfd sp!, {r4, lr} @ 2 + PLD( pld [r1, #0] ) + PLD( pld [r1, #32] ) + mov r2, #COPY_COUNT @ 1 + ldmia r1!, {r3, r4, ip, lr} @ 4+1 +1: PLD( pld [r1, #64] ) + PLD( pld [r1, #96] ) +2: stmia r0!, {r3, r4, ip, lr} @ 4 + ldmia r1!, {r3, r4, ip, lr} @ 4+1 + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmia r1!, {r3, r4, ip, lr} @ 4+1 + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmia r1!, {r3, r4, ip, lr} @ 4 + subs r2, r2, #1 @ 1 + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmgtia r1!, {r3, r4, ip, lr} @ 4 + bgt 1b @ 1 + PLD( ldmeqia r1!, {r3, r4, ip, lr} ) + PLD( beq 2b ) + LOADREGS(fd, sp!, {r4, pc}) @ 3 diff --git a/arch/arm/lib/csumipv6.S b/arch/arm/lib/csumipv6.S new file mode 100644 index 00000000000..7065a20ee8a --- /dev/null +++ b/arch/arm/lib/csumipv6.S @@ -0,0 +1,32 @@ +/* + * linux/arch/arm/lib/csumipv6.S + * + * Copyright (C) 1995-1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +ENTRY(__csum_ipv6_magic) + str lr, [sp, #-4]! + adds ip, r2, r3 + ldmia r1, {r1 - r3, lr} + adcs ip, ip, r1 + adcs ip, ip, r2 + adcs ip, ip, r3 + adcs ip, ip, lr + ldmia r0, {r0 - r3} + adcs r0, ip, r0 + adcs r0, r0, r1 + adcs r0, r0, r2 + ldr r2, [sp, #4] + adcs r0, r0, r3 + adcs r0, r0, r2 + adcs r0, r0, #0 + LOADREGS(fd, sp!, {pc}) + diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S new file mode 100644 index 00000000000..cb5e3708f11 --- /dev/null +++ b/arch/arm/lib/csumpartial.S @@ -0,0 +1,137 @@ +/* + * linux/arch/arm/lib/csumpartial.S + * + * Copyright (C) 1995-1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +/* + * Function: __u32 csum_partial(const char *src, int len, __u32 sum) + * Params : r0 = buffer, r1 = len, r2 = checksum + * Returns : r0 = new checksum + */ + +buf .req r0 +len .req r1 +sum .req r2 +td0 .req r3 +td1 .req r4 @ save before use +td2 .req r5 @ save before use +td3 .req lr + +.zero: mov r0, sum + add sp, sp, #4 + ldr pc, [sp], #4 + + /* + * Handle 0 to 7 bytes, with any alignment of source and + * destination pointers. Note that when we get here, C = 0 + */ +.less8: teq len, #0 @ check for zero count + beq .zero + + /* we must have at least one byte. */ + tst buf, #1 @ odd address? + ldrneb td0, [buf], #1 + subne len, len, #1 + adcnes sum, sum, td0, put_byte_1 + +.less4: tst len, #6 + beq .less8_byte + + /* we are now half-word aligned */ + +.less8_wordlp: +#if __LINUX_ARM_ARCH__ >= 4 + ldrh td0, [buf], #2 + sub len, len, #2 +#else + ldrb td0, [buf], #1 + ldrb td3, [buf], #1 + sub len, len, #2 +#ifndef __ARMEB__ + orr td0, td0, td3, lsl #8 +#else + orr td0, td3, td0, lsl #8 +#endif +#endif + adcs sum, sum, td0 + tst len, #6 + bne .less8_wordlp + +.less8_byte: tst len, #1 @ odd number of bytes + ldrneb td0, [buf], #1 @ include last byte + adcnes sum, sum, td0, put_byte_0 @ update checksum + +.done: adc r0, sum, #0 @ collect up the last carry + ldr td0, [sp], #4 + tst td0, #1 @ check buffer alignment + movne r0, r0, ror #8 @ rotate checksum by 8 bits + ldr pc, [sp], #4 @ return + +.not_aligned: tst buf, #1 @ odd address + ldrneb td0, [buf], #1 @ make even + subne len, len, #1 + adcnes sum, sum, td0, put_byte_1 @ update checksum + + tst buf, #2 @ 32-bit aligned? +#if __LINUX_ARM_ARCH__ >= 4 + ldrneh td0, [buf], #2 @ make 32-bit aligned + subne len, len, #2 +#else + ldrneb td0, [buf], #1 + ldrneb ip, [buf], #1 + subne len, len, #2 +#ifndef __ARMEB__ + orrne td0, td0, ip, lsl #8 +#else + orrne td0, ip, td0, lsl #8 +#endif +#endif + adcnes sum, sum, td0 @ update checksum + mov pc, lr + +ENTRY(csum_partial) + stmfd sp!, {buf, lr} + cmp len, #8 @ Ensure that we have at least + blo .less8 @ 8 bytes to copy. + + adds sum, sum, #0 @ C = 0 + tst buf, #3 @ Test destination alignment + blne .not_aligned @ aligh destination, return here + +1: bics ip, len, #31 + beq 3f + + stmfd sp!, {r4 - r5} +2: ldmia buf!, {td0, td1, td2, td3} + adcs sum, sum, td0 + adcs sum, sum, td1 + adcs sum, sum, td2 + adcs sum, sum, td3 + ldmia buf!, {td0, td1, td2, td3} + adcs sum, sum, td0 + adcs sum, sum, td1 + adcs sum, sum, td2 + adcs sum, sum, td3 + sub ip, ip, #32 + teq ip, #0 + bne 2b + ldmfd sp!, {r4 - r5} + +3: tst len, #0x1c @ should not change C + beq .less4 + +4: ldr td0, [buf], #4 + sub len, len, #4 + adcs sum, sum, td0 + tst len, #0x1c + bne 4b + b .less4 diff --git a/arch/arm/lib/csumpartialcopy.S b/arch/arm/lib/csumpartialcopy.S new file mode 100644 index 00000000000..990ee63b246 --- /dev/null +++ b/arch/arm/lib/csumpartialcopy.S @@ -0,0 +1,52 @@ +/* + * linux/arch/arm/lib/csumpartialcopy.S + * + * Copyright (C) 1995-1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +/* Function: __u32 csum_partial_copy_nocheck(const char *src, char *dst, int len, __u32 sum) + * Params : r0 = src, r1 = dst, r2 = len, r3 = checksum + * Returns : r0 = new checksum + */ + + .macro save_regs + stmfd sp!, {r1, r4 - r8, fp, ip, lr, pc} + .endm + + .macro load_regs,flags + LOADREGS(\flags,fp,{r1, r4 - r8, fp, sp, pc}) + .endm + + .macro load1b, reg1 + ldrb \reg1, [r0], #1 + .endm + + .macro load2b, reg1, reg2 + ldrb \reg1, [r0], #1 + ldrb \reg2, [r0], #1 + .endm + + .macro load1l, reg1 + ldr \reg1, [r0], #4 + .endm + + .macro load2l, reg1, reg2 + ldr \reg1, [r0], #4 + ldr \reg2, [r0], #4 + .endm + + .macro load4l, reg1, reg2, reg3, reg4 + ldmia r0!, {\reg1, \reg2, \reg3, \reg4} + .endm + +#define FN_ENTRY ENTRY(csum_partial_copy_nocheck) + +#include "csumpartialcopygeneric.S" diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S new file mode 100644 index 00000000000..d3a2f4667db --- /dev/null +++ b/arch/arm/lib/csumpartialcopygeneric.S @@ -0,0 +1,331 @@ +/* + * linux/arch/arm/lib/csumpartialcopygeneric.S + * + * Copyright (C) 1995-2001 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * unsigned int + * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) + * r0 = src, r1 = dst, r2 = len, r3 = sum + * Returns : r0 = checksum + * + * Note that 'tst' and 'teq' preserve the carry flag. + */ + +src .req r0 +dst .req r1 +len .req r2 +sum .req r3 + +.zero: mov r0, sum + load_regs ea + + /* + * Align an unaligned destination pointer. We know that + * we have >= 8 bytes here, so we don't need to check + * the length. Note that the source pointer hasn't been + * aligned yet. + */ +.dst_unaligned: tst dst, #1 + beq .dst_16bit + + load1b ip + sub len, len, #1 + adcs sum, sum, ip, put_byte_1 @ update checksum + strb ip, [dst], #1 + tst dst, #2 + moveq pc, lr @ dst is now 32bit aligned + +.dst_16bit: load2b r8, ip + sub len, len, #2 + adcs sum, sum, r8, put_byte_0 + strb r8, [dst], #1 + adcs sum, sum, ip, put_byte_1 + strb ip, [dst], #1 + mov pc, lr @ dst is now 32bit aligned + + /* + * Handle 0 to 7 bytes, with any alignment of source and + * destination pointers. Note that when we get here, C = 0 + */ +.less8: teq len, #0 @ check for zero count + beq .zero + + /* we must have at least one byte. */ + tst dst, #1 @ dst 16-bit aligned + beq .less8_aligned + + /* Align dst */ + load1b ip + sub len, len, #1 + adcs sum, sum, ip, put_byte_1 @ update checksum + strb ip, [dst], #1 + tst len, #6 + beq .less8_byteonly + +1: load2b r8, ip + sub len, len, #2 + adcs sum, sum, r8, put_byte_0 + strb r8, [dst], #1 + adcs sum, sum, ip, put_byte_1 + strb ip, [dst], #1 +.less8_aligned: tst len, #6 + bne 1b +.less8_byteonly: + tst len, #1 + beq .done + load1b r8 + adcs sum, sum, r8, put_byte_0 @ update checksum + strb r8, [dst], #1 + b .done + +FN_ENTRY + mov ip, sp + save_regs + sub fp, ip, #4 + + cmp len, #8 @ Ensure that we have at least + blo .less8 @ 8 bytes to copy. + + adds sum, sum, #0 @ C = 0 + tst dst, #3 @ Test destination alignment + blne .dst_unaligned @ align destination, return here + + /* + * Ok, the dst pointer is now 32bit aligned, and we know + * that we must have more than 4 bytes to copy. Note + * that C contains the carry from the dst alignment above. + */ + + tst src, #3 @ Test source alignment + bne .src_not_aligned + + /* Routine for src & dst aligned */ + + bics ip, len, #15 + beq 2f + +1: load4l r4, r5, r6, r7 + stmia dst!, {r4, r5, r6, r7} + adcs sum, sum, r4 + adcs sum, sum, r5 + adcs sum, sum, r6 + adcs sum, sum, r7 + sub ip, ip, #16 + teq ip, #0 + bne 1b + +2: ands ip, len, #12 + beq 4f + tst ip, #8 + beq 3f + load2l r4, r5 + stmia dst!, {r4, r5} + adcs sum, sum, r4 + adcs sum, sum, r5 + tst ip, #4 + beq 4f + +3: load1l r4 + str r4, [dst], #4 + adcs sum, sum, r4 + +4: ands len, len, #3 + beq .done + load1l r4 + tst len, #2 + mov r5, r4, get_byte_0 + beq .exit + adcs sum, sum, r4, push #16 + strb r5, [dst], #1 + mov r5, r4, get_byte_1 + strb r5, [dst], #1 + mov r5, r4, get_byte_2 +.exit: tst len, #1 + strneb r5, [dst], #1 + andne r5, r5, #255 + adcnes sum, sum, r5, put_byte_0 + + /* + * If the dst pointer was not 16-bit aligned, we + * need to rotate the checksum here to get around + * the inefficient byte manipulations in the + * architecture independent code. + */ +.done: adc r0, sum, #0 + ldr sum, [sp, #0] @ dst + tst sum, #1 + movne r0, r0, ror #8 + load_regs ea + +.src_not_aligned: + adc sum, sum, #0 @ include C from dst alignment + and ip, src, #3 + bic src, src, #3 + load1l r5 + cmp ip, #2 + beq .src2_aligned + bhi .src3_aligned + mov r4, r5, pull #8 @ C = 0 + bics ip, len, #15 + beq 2f +1: load4l r5, r6, r7, r8 + orr r4, r4, r5, push #24 + mov r5, r5, pull #8 + orr r5, r5, r6, push #24 + mov r6, r6, pull #8 + orr r6, r6, r7, push #24 + mov r7, r7, pull #8 + orr r7, r7, r8, push #24 + stmia dst!, {r4, r5, r6, r7} + adcs sum, sum, r4 + adcs sum, sum, r5 + adcs sum, sum, r6 + adcs sum, sum, r7 + mov r4, r8, pull #8 + sub ip, ip, #16 + teq ip, #0 + bne 1b +2: ands ip, len, #12 + beq 4f + tst ip, #8 + beq 3f + load2l r5, r6 + orr r4, r4, r5, push #24 + mov r5, r5, pull #8 + orr r5, r5, r6, push #24 + stmia dst!, {r4, r5} + adcs sum, sum, r4 + adcs sum, sum, r5 + mov r4, r6, pull #8 + tst ip, #4 + beq 4f +3: load1l r5 + orr r4, r4, r5, push #24 + str r4, [dst], #4 + adcs sum, sum, r4 + mov r4, r5, pull #8 +4: ands len, len, #3 + beq .done + mov r5, r4, get_byte_0 + tst len, #2 + beq .exit + adcs sum, sum, r4, push #16 + strb r5, [dst], #1 + mov r5, r4, get_byte_1 + strb r5, [dst], #1 + mov r5, r4, get_byte_2 + b .exit + +.src2_aligned: mov r4, r5, pull #16 + adds sum, sum, #0 + bics ip, len, #15 + beq 2f +1: load4l r5, r6, r7, r8 + orr r4, r4, r5, push #16 + mov r5, r5, pull #16 + orr r5, r5, r6, push #16 + mov r6, r6, pull #16 + orr r6, r6, r7, push #16 + mov r7, r7, pull #16 + orr r7, r7, r8, push #16 + stmia dst!, {r4, r5, r6, r7} + adcs sum, sum, r4 + adcs sum, sum, r5 + adcs sum, sum, r6 + adcs sum, sum, r7 + mov r4, r8, pull #16 + sub ip, ip, #16 + teq ip, #0 + bne 1b +2: ands ip, len, #12 + beq 4f + tst ip, #8 + beq 3f + load2l r5, r6 + orr r4, r4, r5, push #16 + mov r5, r5, pull #16 + orr r5, r5, r6, push #16 + stmia dst!, {r4, r5} + adcs sum, sum, r4 + adcs sum, sum, r5 + mov r4, r6, pull #16 + tst ip, #4 + beq 4f +3: load1l r5 + orr r4, r4, r5, push #16 + str r4, [dst], #4 + adcs sum, sum, r4 + mov r4, r5, pull #16 +4: ands len, len, #3 + beq .done + mov r5, r4, get_byte_0 + tst len, #2 + beq .exit + adcs sum, sum, r4 + strb r5, [dst], #1 + mov r5, r4, get_byte_1 + strb r5, [dst], #1 + tst len, #1 + beq .done + load1b r5 + b .exit + +.src3_aligned: mov r4, r5, pull #24 + adds sum, sum, #0 + bics ip, len, #15 + beq 2f +1: load4l r5, r6, r7, r8 + orr r4, r4, r5, push #8 + mov r5, r5, pull #24 + orr r5, r5, r6, push #8 + mov r6, r6, pull #24 + orr r6, r6, r7, push #8 + mov r7, r7, pull #24 + orr r7, r7, r8, push #8 + stmia dst!, {r4, r5, r6, r7} + adcs sum, sum, r4 + adcs sum, sum, r5 + adcs sum, sum, r6 + adcs sum, sum, r7 + mov r4, r8, pull #24 + sub ip, ip, #16 + teq ip, #0 + bne 1b +2: ands ip, len, #12 + beq 4f + tst ip, #8 + beq 3f + load2l r5, r6 + orr r4, r4, r5, push #8 + mov r5, r5, pull #24 + orr r5, r5, r6, push #8 + stmia dst!, {r4, r5} + adcs sum, sum, r4 + adcs sum, sum, r5 + mov r4, r6, pull #24 + tst ip, #4 + beq 4f +3: load1l r5 + orr r4, r4, r5, push #8 + str r4, [dst], #4 + adcs sum, sum, r4 + mov r4, r5, pull #24 +4: ands len, len, #3 + beq .done + mov r5, r4, get_byte_0 + tst len, #2 + beq .exit + strb r5, [dst], #1 + adcs sum, sum, r4 + load1l r4 + mov r5, r4, get_byte_0 + strb r5, [dst], #1 + adcs sum, sum, r4, push #24 + mov r5, r4, get_byte_1 + b .exit diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S new file mode 100644 index 00000000000..46a2dc962e9 --- /dev/null +++ b/arch/arm/lib/csumpartialcopyuser.S @@ -0,0 +1,104 @@ +/* + * linux/arch/arm/lib/csumpartialcopyuser.S + * + * Copyright (C) 1995-1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 27/03/03 Ian Molton Clean up CONFIG_CPU + * + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/errno.h> +#include <asm/constants.h> + + .text + + .macro save_regs + stmfd sp!, {r1 - r2, r4 - r8, fp, ip, lr, pc} + .endm + + .macro load_regs,flags + ldm\flags fp, {r1, r2, r4-r8, fp, sp, pc} + .endm + + .macro load1b, reg1 +9999: ldrbt \reg1, [r0], $1 + .section __ex_table, "a" + .align 3 + .long 9999b, 6001f + .previous + .endm + + .macro load2b, reg1, reg2 +9999: ldrbt \reg1, [r0], $1 +9998: ldrbt \reg2, [r0], $1 + .section __ex_table, "a" + .long 9999b, 6001f + .long 9998b, 6001f + .previous + .endm + + .macro load1l, reg1 +9999: ldrt \reg1, [r0], $4 + .section __ex_table, "a" + .align 3 + .long 9999b, 6001f + .previous + .endm + + .macro load2l, reg1, reg2 +9999: ldrt \reg1, [r0], $4 +9998: ldrt \reg2, [r0], $4 + .section __ex_table, "a" + .long 9999b, 6001f + .long 9998b, 6001f + .previous + .endm + + .macro load4l, reg1, reg2, reg3, reg4 +9999: ldrt \reg1, [r0], $4 +9998: ldrt \reg2, [r0], $4 +9997: ldrt \reg3, [r0], $4 +9996: ldrt \reg4, [r0], $4 + .section __ex_table, "a" + .long 9999b, 6001f + .long 9998b, 6001f + .long 9997b, 6001f + .long 9996b, 6001f + .previous + .endm + +/* + * unsigned int + * csum_partial_copy_from_user(const char *src, char *dst, int len, int sum, int *err_ptr) + * r0 = src, r1 = dst, r2 = len, r3 = sum, [sp] = *err_ptr + * Returns : r0 = checksum, [[sp, #0], #0] = 0 or -EFAULT + */ + +#define FN_ENTRY ENTRY(csum_partial_copy_from_user) + +#include "csumpartialcopygeneric.S" + +/* + * FIXME: minor buglet here + * We don't return the checksum for the data present in the buffer. To do + * so properly, we would have to add in whatever registers were loaded before + * the fault, which, with the current asm above is not predictable. + */ + .section .fixup,"ax" + .align 4 +6001: mov r4, #-EFAULT + ldr r5, [fp, #4] @ *err_ptr + str r4, [r5] + ldmia sp, {r1, r2} @ retrieve dst, len + add r2, r2, r1 + mov r0, #0 @ zero the buffer +6002: teq r2, r1 + strneb r0, [r1], #1 + bne 6002b + load_regs ea + .previous diff --git a/arch/arm/lib/delay.S b/arch/arm/lib/delay.S new file mode 100644 index 00000000000..3c7f7e675dd --- /dev/null +++ b/arch/arm/lib/delay.S @@ -0,0 +1,58 @@ +/* + * linux/arch/arm/lib/delay.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +LC0: .word loops_per_jiffy + +/* + * 0 <= r0 <= 2000 + */ +ENTRY(__udelay) + mov r2, #0x6800 + orr r2, r2, #0x00db + mul r0, r2, r0 +ENTRY(__const_udelay) @ 0 <= r0 <= 0x01ffffff + ldr r2, LC0 + ldr r2, [r2] @ max = 0x0fffffff + mov r0, r0, lsr #11 @ max = 0x00003fff + mov r2, r2, lsr #11 @ max = 0x0003ffff + mul r0, r2, r0 @ max = 2^32-1 + movs r0, r0, lsr #6 + RETINSTR(moveq,pc,lr) + +/* + * loops = (r0 * 0x10c6 * 100 * loops_per_jiffy) / 2^32 + * + * Oh, if only we had a cycle counter... + */ + +@ Delay routine +ENTRY(__delay) + subs r0, r0, #1 +#if 0 + RETINSTR(movls,pc,lr) + subs r0, r0, #1 + RETINSTR(movls,pc,lr) + subs r0, r0, #1 + RETINSTR(movls,pc,lr) + subs r0, r0, #1 + RETINSTR(movls,pc,lr) + subs r0, r0, #1 + RETINSTR(movls,pc,lr) + subs r0, r0, #1 + RETINSTR(movls,pc,lr) + subs r0, r0, #1 + RETINSTR(movls,pc,lr) + subs r0, r0, #1 +#endif + bhi __delay + RETINSTR(mov,pc,lr) diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S new file mode 100644 index 00000000000..ec9a1cd6176 --- /dev/null +++ b/arch/arm/lib/div64.S @@ -0,0 +1,200 @@ +/* + * linux/arch/arm/lib/div64.S + * + * Optimized computation of 64-bit dividend / 32-bit divisor + * + * Author: Nicolas Pitre + * Created: Oct 5, 2003 + * Copyright: Monta Vista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> + +#ifdef __ARMEB__ +#define xh r0 +#define xl r1 +#define yh r2 +#define yl r3 +#else +#define xl r0 +#define xh r1 +#define yl r2 +#define yh r3 +#endif + +/* + * __do_div64: perform a division with 64-bit dividend and 32-bit divisor. + * + * Note: Calling convention is totally non standard for optimal code. + * This is meant to be used by do_div() from include/asm/div64.h only. + * + * Input parameters: + * xh-xl = dividend (clobbered) + * r4 = divisor (preserved) + * + * Output values: + * yh-yl = result + * xh = remainder + * + * Clobbered regs: xl, ip + */ + +ENTRY(__do_div64) + + @ Test for easy paths first. + subs ip, r4, #1 + bls 9f @ divisor is 0 or 1 + tst ip, r4 + beq 8f @ divisor is power of 2 + + @ See if we need to handle upper 32-bit result. + cmp xh, r4 + mov yh, #0 + blo 3f + + @ Align divisor with upper part of dividend. + @ The aligned divisor is stored in yl preserving the original. + @ The bit position is stored in ip. + +#if __LINUX_ARM_ARCH__ >= 5 + + clz yl, r4 + clz ip, xh + sub yl, yl, ip + mov ip, #1 + mov ip, ip, lsl yl + mov yl, r4, lsl yl + +#else + + mov yl, r4 + mov ip, #1 +1: cmp yl, #0x80000000 + cmpcc yl, xh + movcc yl, yl, lsl #1 + movcc ip, ip, lsl #1 + bcc 1b + +#endif + + @ The division loop for needed upper bit positions. + @ Break out early if dividend reaches 0. +2: cmp xh, yl + orrcs yh, yh, ip + subcss xh, xh, yl + movnes ip, ip, lsr #1 + mov yl, yl, lsr #1 + bne 2b + + @ See if we need to handle lower 32-bit result. +3: cmp xh, #0 + mov yl, #0 + cmpeq xl, r4 + movlo xh, xl + movlo pc, lr + + @ The division loop for lower bit positions. + @ Here we shift remainer bits leftwards rather than moving the + @ divisor for comparisons, considering the carry-out bit as well. + mov ip, #0x80000000 +4: movs xl, xl, lsl #1 + adcs xh, xh, xh + beq 6f + cmpcc xh, r4 +5: orrcs yl, yl, ip + subcs xh, xh, r4 + movs ip, ip, lsr #1 + bne 4b + mov pc, lr + + @ The top part of remainder became zero. If carry is set + @ (the 33th bit) this is a false positive so resume the loop. + @ Otherwise, if lower part is also null then we are done. +6: bcs 5b + cmp xl, #0 + moveq pc, lr + + @ We still have remainer bits in the low part. Bring them up. + +#if __LINUX_ARM_ARCH__ >= 5 + + clz xh, xl @ we know xh is zero here so... + add xh, xh, #1 + mov xl, xl, lsl xh + mov ip, ip, lsr xh + +#else + +7: movs xl, xl, lsl #1 + mov ip, ip, lsr #1 + bcc 7b + +#endif + + @ Current remainder is now 1. It is worthless to compare with + @ divisor at this point since divisor can not be smaller than 3 here. + @ If possible, branch for another shift in the division loop. + @ If no bit position left then we are done. + movs ip, ip, lsr #1 + mov xh, #1 + bne 4b + mov pc, lr + +8: @ Division by a power of 2: determine what that divisor order is + @ then simply shift values around + +#if __LINUX_ARM_ARCH__ >= 5 + + clz ip, r4 + rsb ip, ip, #31 + +#else + + mov yl, r4 + cmp r4, #(1 << 16) + mov ip, #0 + movhs yl, yl, lsr #16 + movhs ip, #16 + + cmp yl, #(1 << 8) + movhs yl, yl, lsr #8 + addhs ip, ip, #8 + + cmp yl, #(1 << 4) + movhs yl, yl, lsr #4 + addhs ip, ip, #4 + + cmp yl, #(1 << 2) + addhi ip, ip, #3 + addls ip, ip, yl, lsr #1 + +#endif + + mov yh, xh, lsr ip + mov yl, xl, lsr ip + rsb ip, ip, #32 + orr yl, yl, xh, lsl ip + mov xh, xl, lsl ip + mov xh, xh, lsr ip + mov pc, lr + + @ eq -> division by 1: obvious enough... +9: moveq yl, xl + moveq yh, xh + moveq xh, #0 + moveq pc, lr + + @ Division by 0: + str lr, [sp, #-4]! + bl __div0 + + @ as wrong as it could be... + mov yl, #0 + mov yh, #0 + mov xh, #0 + ldr pc, [sp], #4 + diff --git a/arch/arm/lib/ecard.S b/arch/arm/lib/ecard.S new file mode 100644 index 00000000000..fb7b602a6f7 --- /dev/null +++ b/arch/arm/lib/ecard.S @@ -0,0 +1,45 @@ +/* + * linux/arch/arm/lib/ecard.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 27/03/03 Ian Molton Clean up CONFIG_CPU + * + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/hardware.h> + +#define CPSR2SPSR(rt) \ + mrs rt, cpsr; \ + msr spsr_cxsf, rt + +@ Purpose: call an expansion card loader to read bytes. +@ Proto : char read_loader(int offset, char *card_base, char *loader); +@ Returns: byte read + +ENTRY(ecard_loader_read) + stmfd sp!, {r4 - r12, lr} + mov r11, r1 + mov r1, r0 + CPSR2SPSR(r0) + mov lr, pc + mov pc, r2 + LOADREGS(fd, sp!, {r4 - r12, pc}) + +@ Purpose: call an expansion card loader to reset the card +@ Proto : void read_loader(int card_base, char *loader); +@ Returns: byte read + +ENTRY(ecard_loader_reset) + stmfd sp!, {r4 - r12, lr} + mov r11, r0 + CPSR2SPSR(r0) + mov lr, pc + add pc, r1, #8 + LOADREGS(fd, sp!, {r4 - r12, pc}) + diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S new file mode 100644 index 00000000000..f055d56ea68 --- /dev/null +++ b/arch/arm/lib/findbit.S @@ -0,0 +1,168 @@ +/* + * linux/arch/arm/lib/findbit.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 16th March 2001 - John Ripley <jripley@sonicblue.com> + * Fixed so that "size" is an exclusive not an inclusive quantity. + * All users of these functions expect exclusive sizes, and may + * also call with zero size. + * Reworked by rmk. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +/* + * Purpose : Find a 'zero' bit + * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit); + */ +ENTRY(_find_first_zero_bit_le) + teq r1, #0 + beq 3f + mov r2, #0 +1: ldrb r3, [r0, r2, lsr #3] + eors r3, r3, #0xff @ invert bits + bne .found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + RETINSTR(mov,pc,lr) + +/* + * Purpose : Find next 'zero' bit + * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset) + */ +ENTRY(_find_next_zero_bit_le) + teq r1, #0 + beq 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + ldrb r3, [r0, r2, lsr #3] + eor r3, r3, #0xff @ now looking for a 1 bit + movs r3, r3, lsr ip @ shift off unused bits + bne .found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit + +/* + * Purpose : Find a 'one' bit + * Prototype: int find_first_bit(const unsigned long *addr, unsigned int maxbit); + */ +ENTRY(_find_first_bit_le) + teq r1, #0 + beq 3f + mov r2, #0 +1: ldrb r3, [r0, r2, lsr #3] + movs r3, r3 + bne .found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + RETINSTR(mov,pc,lr) + +/* + * Purpose : Find next 'one' bit + * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset) + */ +ENTRY(_find_next_bit_le) + teq r1, #0 + beq 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + ldrb r3, [r0, r2, lsr #3] + movs r3, r3, lsr ip @ shift off unused bits + bne .found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit + +#ifdef __ARMEB__ + +ENTRY(_find_first_zero_bit_be) + teq r1, #0 + beq 3f + mov r2, #0 +1: eor r3, r2, #0x18 @ big endian byte ordering + ldrb r3, [r0, r3, lsr #3] + eors r3, r3, #0xff @ invert bits + bne .found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + RETINSTR(mov,pc,lr) + +ENTRY(_find_next_zero_bit_be) + teq r1, #0 + beq 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + eor r3, r2, #0x18 @ big endian byte ordering + ldrb r3, [r0, r3, lsr #3] + eor r3, r3, #0xff @ now looking for a 1 bit + movs r3, r3, lsr ip @ shift off unused bits + bne .found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit + +ENTRY(_find_first_bit_be) + teq r1, #0 + beq 3f + mov r2, #0 +1: eor r3, r2, #0x18 @ big endian byte ordering + ldrb r3, [r0, r3, lsr #3] + movs r3, r3 + bne .found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + RETINSTR(mov,pc,lr) + +ENTRY(_find_next_bit_be) + teq r1, #0 + beq 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + eor r3, r2, #0x18 @ big endian byte ordering + ldrb r3, [r0, r3, lsr #3] + movs r3, r3, lsr ip @ shift off unused bits + bne .found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit + +#endif + +/* + * One or more bits in the LSB of r3 are assumed to be set. + */ +.found: +#if __LINUX_ARM_ARCH__ >= 5 + rsb r1, r3, #0 + and r3, r3, r1 + clz r3, r3 + rsb r3, r3, #31 + add r0, r2, r3 +#else + tst r3, #0x0f + addeq r2, r2, #4 + movne r3, r3, lsl #4 + tst r3, #0x30 + addeq r2, r2, #2 + movne r3, r3, lsl #2 + tst r3, #0x40 + addeq r2, r2, #1 + mov r0, r2 +#endif + RETINSTR(mov,pc,lr) + diff --git a/arch/arm/lib/floppydma.S b/arch/arm/lib/floppydma.S new file mode 100644 index 00000000000..617150b1bae --- /dev/null +++ b/arch/arm/lib/floppydma.S @@ -0,0 +1,32 @@ +/* + * linux/arch/arm/lib/floppydma.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + + .global floppy_fiqin_end +ENTRY(floppy_fiqin_start) + subs r9, r9, #1 + ldrgtb r12, [r11, #-4] + ldrleb r12, [r11], #0 + strb r12, [r10], #1 + subs pc, lr, #4 +floppy_fiqin_end: + + .global floppy_fiqout_end +ENTRY(floppy_fiqout_start) + subs r9, r9, #1 + ldrgeb r12, [r10], #1 + movlt r12, #0 + strleb r12, [r11], #0 + subles pc, lr, #4 + strb r12, [r11, #-4] + subs pc, lr, #4 +floppy_fiqout_end: diff --git a/arch/arm/lib/gcclib.h b/arch/arm/lib/gcclib.h new file mode 100644 index 00000000000..65314a3d9e2 --- /dev/null +++ b/arch/arm/lib/gcclib.h @@ -0,0 +1,25 @@ +/* gcclib.h -- definitions for various functions 'borrowed' from gcc-2.95.3 */ +/* I Molton 29/07/01 */ + +#define BITS_PER_UNIT 8 +#define SI_TYPE_SIZE (sizeof (SItype) * BITS_PER_UNIT) + +typedef unsigned int UQItype __attribute__ ((mode (QI))); +typedef int SItype __attribute__ ((mode (SI))); +typedef unsigned int USItype __attribute__ ((mode (SI))); +typedef int DItype __attribute__ ((mode (DI))); +typedef int word_type __attribute__ ((mode (__word__))); +typedef unsigned int UDItype __attribute__ ((mode (DI))); + +#ifdef __ARMEB__ + struct DIstruct {SItype high, low;}; +#else + struct DIstruct {SItype low, high;}; +#endif + +typedef union +{ + struct DIstruct s; + DItype ll; +} DIunion; + diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S new file mode 100644 index 00000000000..64aa6f4fe5e --- /dev/null +++ b/arch/arm/lib/getuser.S @@ -0,0 +1,78 @@ +/* + * linux/arch/arm/lib/getuser.S + * + * Copyright (C) 2001 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Idea from x86 version, (C) Copyright 1998 Linus Torvalds + * + * These functions have a non-standard call interface to make them more + * efficient, especially as they return an error value in addition to + * the "real" return value. + * + * __get_user_X + * + * Inputs: r0 contains the address + * Outputs: r0 is the error code + * r2, r3 contains the zero-extended value + * lr corrupted + * + * No other registers must be altered. (see include/asm-arm/uaccess.h + * for specific ASM register usage). + * + * Note that ADDR_LIMIT is either 0 or 0xc0000000. + * Note also that it is intended that __get_user_bad is not global. + */ +#include <asm/constants.h> +#include <asm/thread_info.h> +#include <asm/errno.h> + + .global __get_user_1 +__get_user_1: +1: ldrbt r2, [r0] + mov r0, #0 + mov pc, lr + + .global __get_user_2 +__get_user_2: +2: ldrbt r2, [r0], #1 +3: ldrbt r3, [r0] +#ifndef __ARMEB__ + orr r2, r2, r3, lsl #8 +#else + orr r2, r3, r2, lsl #8 +#endif + mov r0, #0 + mov pc, lr + + .global __get_user_4 +__get_user_4: +4: ldrt r2, [r0] + mov r0, #0 + mov pc, lr + + .global __get_user_8 +__get_user_8: +5: ldrt r2, [r0], #4 +6: ldrt r3, [r0] + mov r0, #0 + mov pc, lr + +__get_user_bad_8: + mov r3, #0 +__get_user_bad: + mov r2, #0 + mov r0, #-EFAULT + mov pc, lr + +.section __ex_table, "a" + .long 1b, __get_user_bad + .long 2b, __get_user_bad + .long 3b, __get_user_bad + .long 4b, __get_user_bad + .long 5b, __get_user_bad_8 + .long 6b, __get_user_bad_8 +.previous diff --git a/arch/arm/lib/io-acorn.S b/arch/arm/lib/io-acorn.S new file mode 100644 index 00000000000..3aacd01d40e --- /dev/null +++ b/arch/arm/lib/io-acorn.S @@ -0,0 +1,32 @@ +/* + * linux/arch/arm/lib/io-acorn.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 27/03/03 Ian Molton Clean up CONFIG_CPU + * + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/hardware.h> + + .text + .align + +.iosl_warning: + .ascii "<4>insl/outsl not implemented, called from %08lX\0" + .align + +/* + * These make no sense on Acorn machines. + * Print a warning message. + */ +ENTRY(insl) +ENTRY(outsl) + adr r0, .iosl_warning + mov r1, lr + b printk diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S new file mode 100644 index 00000000000..081ef749298 --- /dev/null +++ b/arch/arm/lib/io-readsb.S @@ -0,0 +1,122 @@ +/* + * linux/arch/arm/lib/io-readsb.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + +.insb_align: rsb ip, ip, #4 + cmp ip, r2 + movgt ip, r2 + cmp ip, #2 + ldrb r3, [r0] + strb r3, [r1], #1 + ldrgeb r3, [r0] + strgeb r3, [r1], #1 + ldrgtb r3, [r0] + strgtb r3, [r1], #1 + subs r2, r2, ip + bne .insb_aligned + +ENTRY(__raw_readsb) + teq r2, #0 @ do we have to check for the zero len? + moveq pc, lr + ands ip, r1, #3 + bne .insb_align + +.insb_aligned: stmfd sp!, {r4 - r6, lr} + + subs r2, r2, #16 + bmi .insb_no_16 + +.insb_16_lp: ldrb r3, [r0] + ldrb r4, [r0] + ldrb r5, [r0] + mov r3, r3, put_byte_0 + ldrb r6, [r0] + orr r3, r3, r4, put_byte_1 + ldrb r4, [r0] + orr r3, r3, r5, put_byte_2 + ldrb r5, [r0] + orr r3, r3, r6, put_byte_3 + ldrb r6, [r0] + mov r4, r4, put_byte_0 + ldrb ip, [r0] + orr r4, r4, r5, put_byte_1 + ldrb r5, [r0] + orr r4, r4, r6, put_byte_2 + ldrb r6, [r0] + orr r4, r4, ip, put_byte_3 + ldrb ip, [r0] + mov r5, r5, put_byte_0 + ldrb lr, [r0] + orr r5, r5, r6, put_byte_1 + ldrb r6, [r0] + orr r5, r5, ip, put_byte_2 + ldrb ip, [r0] + orr r5, r5, lr, put_byte_3 + ldrb lr, [r0] + mov r6, r6, put_byte_0 + orr r6, r6, ip, put_byte_1 + ldrb ip, [r0] + orr r6, r6, lr, put_byte_2 + orr r6, r6, ip, put_byte_3 + stmia r1!, {r3 - r6} + + subs r2, r2, #16 + bpl .insb_16_lp + + tst r2, #15 + LOADREGS(eqfd, sp!, {r4 - r6, pc}) + +.insb_no_16: tst r2, #8 + beq .insb_no_8 + + ldrb r3, [r0] + ldrb r4, [r0] + ldrb r5, [r0] + mov r3, r3, put_byte_0 + ldrb r6, [r0] + orr r3, r3, r4, put_byte_1 + ldrb r4, [r0] + orr r3, r3, r5, put_byte_2 + ldrb r5, [r0] + orr r3, r3, r6, put_byte_3 + ldrb r6, [r0] + mov r4, r4, put_byte_0 + ldrb ip, [r0] + orr r4, r4, r5, put_byte_1 + orr r4, r4, r6, put_byte_2 + orr r4, r4, ip, put_byte_3 + stmia r1!, {r3, r4} + +.insb_no_8: tst r2, #4 + beq .insb_no_4 + + ldrb r3, [r0] + ldrb r4, [r0] + ldrb r5, [r0] + ldrb r6, [r0] + mov r3, r3, put_byte_0 + orr r3, r3, r4, put_byte_1 + orr r3, r3, r5, put_byte_2 + orr r3, r3, r6, put_byte_3 + str r3, [r1], #4 + +.insb_no_4: ands r2, r2, #3 + LOADREGS(eqfd, sp!, {r4 - r6, pc}) + + cmp r2, #2 + ldrb r3, [r0] + strb r3, [r1], #1 + ldrgeb r3, [r0] + strgeb r3, [r1], #1 + ldrgtb r3, [r0] + strgtb r3, [r1] + + LOADREGS(fd, sp!, {r4 - r6, pc}) diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S new file mode 100644 index 00000000000..75a9121cb23 --- /dev/null +++ b/arch/arm/lib/io-readsl.S @@ -0,0 +1,78 @@ +/* + * linux/arch/arm/lib/io-readsl.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + +ENTRY(__raw_readsl) + teq r2, #0 @ do we have to check for the zero len? + moveq pc, lr + ands ip, r1, #3 + bne 3f + + subs r2, r2, #4 + bmi 2f + stmfd sp!, {r4, lr} +1: ldr r3, [r0, #0] + ldr r4, [r0, #0] + ldr ip, [r0, #0] + ldr lr, [r0, #0] + subs r2, r2, #4 + stmia r1!, {r3, r4, ip, lr} + bpl 1b + ldmfd sp!, {r4, lr} +2: movs r2, r2, lsl #31 + ldrcs r3, [r0, #0] + ldrcs ip, [r0, #0] + stmcsia r1!, {r3, ip} + ldrne r3, [r0, #0] + strne r3, [r1, #0] + mov pc, lr + +3: ldr r3, [r0] + cmp ip, #2 + mov ip, r3, get_byte_0 + strb ip, [r1], #1 + bgt 6f + mov ip, r3, get_byte_1 + strb ip, [r1], #1 + beq 5f + mov ip, r3, get_byte_2 + strb ip, [r1], #1 + +4: subs r2, r2, #1 + mov ip, r3, pull #24 + ldrne r3, [r0] + orrne ip, ip, r3, push #8 + strne ip, [r1], #4 + bne 4b + b 8f + +5: subs r2, r2, #1 + mov ip, r3, pull #16 + ldrne r3, [r0] + orrne ip, ip, r3, push #16 + strne ip, [r1], #4 + bne 5b + b 7f + +6: subs r2, r2, #1 + mov ip, r3, pull #8 + ldrne r3, [r0] + orrne ip, ip, r3, push #24 + strne ip, [r1], #4 + bne 6b + + mov r3, ip, get_byte_2 + strb r3, [r1, #2] +7: mov r3, ip, get_byte_1 + strb r3, [r1, #1] +8: mov r3, ip, get_byte_0 + strb r3, [r1, #0] + mov pc, lr diff --git a/arch/arm/lib/io-readsw-armv3.S b/arch/arm/lib/io-readsw-armv3.S new file mode 100644 index 00000000000..476cf7f8a63 --- /dev/null +++ b/arch/arm/lib/io-readsw-armv3.S @@ -0,0 +1,107 @@ +/* + * linux/arch/arm/lib/io-readsw-armv3.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/hardware.h> + +.insw_bad_alignment: + adr r0, .insw_bad_align_msg + mov r2, lr + b panic +.insw_bad_align_msg: + .asciz "insw: bad buffer alignment (0x%p, lr=0x%08lX)\n" + .align + +.insw_align: tst r1, #1 + bne .insw_bad_alignment + + ldr r3, [r0] + strb r3, [r1], #1 + mov r3, r3, lsr #8 + strb r3, [r1], #1 + + subs r2, r2, #1 + RETINSTR(moveq, pc, lr) + +ENTRY(__raw_readsw) + teq r2, #0 @ do we have to check for the zero len? + moveq pc, lr + tst r1, #3 + bne .insw_align + +.insw_aligned: mov ip, #0xff + orr ip, ip, ip, lsl #8 + stmfd sp!, {r4, r5, r6, lr} + + subs r2, r2, #8 + bmi .no_insw_8 + +.insw_8_lp: ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + ldr r4, [r0] + and r4, r4, ip + ldr r5, [r0] + orr r4, r4, r5, lsl #16 + + ldr r5, [r0] + and r5, r5, ip + ldr r6, [r0] + orr r5, r5, r6, lsl #16 + + ldr r6, [r0] + and r6, r6, ip + ldr lr, [r0] + orr r6, r6, lr, lsl #16 + + stmia r1!, {r3 - r6} + + subs r2, r2, #8 + bpl .insw_8_lp + + tst r2, #7 + LOADREGS(eqfd, sp!, {r4, r5, r6, pc}) + +.no_insw_8: tst r2, #4 + beq .no_insw_4 + + ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + ldr r4, [r0] + and r4, r4, ip + ldr r5, [r0] + orr r4, r4, r5, lsl #16 + + stmia r1!, {r3, r4} + +.no_insw_4: tst r2, #2 + beq .no_insw_2 + + ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + str r3, [r1], #4 + +.no_insw_2: tst r2, #1 + ldrne r3, [r0] + strneb r3, [r1], #1 + movne r3, r3, lsr #8 + strneb r3, [r1] + + LOADREGS(fd, sp!, {r4, r5, r6, pc}) + + diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S new file mode 100644 index 00000000000..c92b66ecbe8 --- /dev/null +++ b/arch/arm/lib/io-readsw-armv4.S @@ -0,0 +1,130 @@ +/* + * linux/arch/arm/lib/io-readsw-armv4.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .macro pack, rd, hw1, hw2 +#ifndef __ARMEB__ + orr \rd, \hw1, \hw2, lsl #16 +#else + orr \rd, \hw2, \hw1, lsl #16 +#endif + .endm + +.insw_align: movs ip, r1, lsl #31 + bne .insw_noalign + ldrh ip, [r0] + sub r2, r2, #1 + strh ip, [r1], #2 + +ENTRY(__raw_readsw) + teq r2, #0 + moveq pc, lr + tst r1, #3 + bne .insw_align + + stmfd sp!, {r4, r5, lr} + + subs r2, r2, #8 + bmi .no_insw_8 + +.insw_8_lp: ldrh r3, [r0] + ldrh r4, [r0] + pack r3, r3, r4 + + ldrh r4, [r0] + ldrh r5, [r0] + pack r4, r4, r5 + + ldrh r5, [r0] + ldrh ip, [r0] + pack r5, r5, ip + + ldrh ip, [r0] + ldrh lr, [r0] + pack ip, ip, lr + + subs r2, r2, #8 + stmia r1!, {r3 - r5, ip} + bpl .insw_8_lp + +.no_insw_8: tst r2, #4 + beq .no_insw_4 + + ldrh r3, [r0] + ldrh r4, [r0] + pack r3, r3, r4 + + ldrh r4, [r0] + ldrh ip, [r0] + pack r4, r4, ip + + stmia r1!, {r3, r4} + +.no_insw_4: movs r2, r2, lsl #31 + bcc .no_insw_2 + + ldrh r3, [r0] + ldrh ip, [r0] + pack r3, r3, ip + str r3, [r1], #4 + +.no_insw_2: ldrneh r3, [r0] + strneh r3, [r1] + + ldmfd sp!, {r4, r5, pc} + +#ifdef __ARMEB__ +#define _BE_ONLY_(code...) code +#define _LE_ONLY_(code...) +#define push_hbyte0 lsr #8 +#define pull_hbyte1 lsl #24 +#else +#define _BE_ONLY_(code...) +#define _LE_ONLY_(code...) code +#define push_hbyte0 lsl #24 +#define pull_hbyte1 lsr #8 +#endif + +.insw_noalign: stmfd sp!, {r4, lr} + ldrccb ip, [r1, #-1]! + bcc 1f + + ldrh ip, [r0] + sub r2, r2, #1 + _BE_ONLY_( mov ip, ip, ror #8 ) + strb ip, [r1], #1 + _LE_ONLY_( mov ip, ip, lsr #8 ) + _BE_ONLY_( mov ip, ip, lsr #24 ) + +1: subs r2, r2, #2 + bmi 3f + _BE_ONLY_( mov ip, ip, lsl #24 ) + +2: ldrh r3, [r0] + ldrh r4, [r0] + subs r2, r2, #2 + orr ip, ip, r3, lsl #8 + orr ip, ip, r4, push_hbyte0 + str ip, [r1], #4 + mov ip, r4, pull_hbyte1 + bpl 2b + + _BE_ONLY_( mov ip, ip, lsr #24 ) + +3: tst r2, #1 + strb ip, [r1], #1 + ldrneh ip, [r0] + _BE_ONLY_( movne ip, ip, ror #8 ) + strneb ip, [r1], #1 + _LE_ONLY_( movne ip, ip, lsr #8 ) + _BE_ONLY_( movne ip, ip, lsr #24 ) + strneb ip, [r1] + ldmfd sp!, {r4, pc} diff --git a/arch/arm/lib/io-shark.c b/arch/arm/lib/io-shark.c new file mode 100644 index 00000000000..108d4573e97 --- /dev/null +++ b/arch/arm/lib/io-shark.c @@ -0,0 +1,83 @@ +/* + * linux/arch/arm/lib/io-shark.c + * + * by Alexander Schulz + * + * derived from: + * linux/arch/arm/lib/io-ebsa.S + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/kernel.h> + +#include <asm/io.h> + +void print_warning(void) +{ + printk(KERN_WARNING "ins?/outs? not implemented on this architecture\n"); +} + +void insl(unsigned int port, void *to, int len) +{ + print_warning(); +} + +void insb(unsigned int port, void *to, int len) +{ + print_warning(); +} + +void outsl(unsigned int port, const void *from, int len) +{ + print_warning(); +} + +void outsb(unsigned int port, const void *from, int len) +{ + print_warning(); +} + +/* these should be in assembler again */ + +/* + * Purpose: read a block of data from a hardware register to memory. + * Proto : insw(int from_port, void *to, int len_in_words); + * Proto : inswb(int from_port, void *to, int len_in_bytes); + * Notes : increment to + */ + +void insw(unsigned int port, void *to, int len) +{ + int i; + + for (i = 0; i < len; i++) + ((unsigned short *) to)[i] = inw(port); +} + +void inswb(unsigned int port, void *to, int len) +{ + insw(port, to, len >> 2); +} + +/* + * Purpose: write a block of data from memory to a hardware register. + * Proto : outsw(int to_reg, void *from, int len_in_words); + * Proto : outswb(int to_reg, void *from, int len_in_bytes); + * Notes : increments from + */ + +void outsw(unsigned int port, const void *from, int len) +{ + int i; + + for (i = 0; i < len; i++) + outw(((unsigned short *) from)[i], port); +} + +void outswb(unsigned int port, const void *from, int len) +{ + outsw(port, from, len >> 2); +} diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S new file mode 100644 index 00000000000..70b2561bdb0 --- /dev/null +++ b/arch/arm/lib/io-writesb.S @@ -0,0 +1,92 @@ +/* + * linux/arch/arm/lib/io-writesb.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .macro outword, rd +#ifndef __ARMEB__ + strb \rd, [r0] + mov \rd, \rd, lsr #8 + strb \rd, [r0] + mov \rd, \rd, lsr #8 + strb \rd, [r0] + mov \rd, \rd, lsr #8 + strb \rd, [r0] +#else + mov lr, \rd, lsr #24 + strb lr, [r0] + mov lr, \rd, lsr #16 + strb lr, [r0] + mov lr, \rd, lsr #8 + strb lr, [r0] + strb \rd, [r0] +#endif + .endm + +.outsb_align: rsb ip, ip, #4 + cmp ip, r2 + movgt ip, r2 + cmp ip, #2 + ldrb r3, [r1], #1 + strb r3, [r0] + ldrgeb r3, [r1], #1 + strgeb r3, [r0] + ldrgtb r3, [r1], #1 + strgtb r3, [r0] + subs r2, r2, ip + bne .outsb_aligned + +ENTRY(__raw_writesb) + teq r2, #0 @ do we have to check for the zero len? + moveq pc, lr + ands ip, r1, #3 + bne .outsb_align + +.outsb_aligned: stmfd sp!, {r4, r5, lr} + + subs r2, r2, #16 + bmi .outsb_no_16 + +.outsb_16_lp: ldmia r1!, {r3, r4, r5, ip} + outword r3 + outword r4 + outword r5 + outword ip + subs r2, r2, #16 + bpl .outsb_16_lp + + tst r2, #15 + LOADREGS(eqfd, sp!, {r4, r5, pc}) + +.outsb_no_16: tst r2, #8 + beq .outsb_no_8 + + ldmia r1!, {r3, r4} + outword r3 + outword r4 + +.outsb_no_8: tst r2, #4 + beq .outsb_no_4 + + ldr r3, [r1], #4 + outword r3 + +.outsb_no_4: ands r2, r2, #3 + LOADREGS(eqfd, sp!, {r4, r5, pc}) + + cmp r2, #2 + ldrb r3, [r1], #1 + strb r3, [r0] + ldrgeb r3, [r1], #1 + strgeb r3, [r0] + ldrgtb r3, [r1] + strgtb r3, [r0] + + LOADREGS(fd, sp!, {r4, r5, pc}) diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S new file mode 100644 index 00000000000..f8f14dd227c --- /dev/null +++ b/arch/arm/lib/io-writesl.S @@ -0,0 +1,66 @@ +/* + * linux/arch/arm/lib/io-writesl.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + +ENTRY(__raw_writesl) + teq r2, #0 @ do we have to check for the zero len? + moveq pc, lr + ands ip, r1, #3 + bne 3f + + subs r2, r2, #4 + bmi 2f + stmfd sp!, {r4, lr} +1: ldmia r1!, {r3, r4, ip, lr} + subs r2, r2, #4 + str r3, [r0, #0] + str r4, [r0, #0] + str ip, [r0, #0] + str lr, [r0, #0] + bpl 1b + ldmfd sp!, {r4, lr} +2: movs r2, r2, lsl #31 + ldmcsia r1!, {r3, ip} + strcs r3, [r0, #0] + ldrne r3, [r1, #0] + strcs ip, [r0, #0] + strne r3, [r0, #0] + mov pc, lr + +3: bic r1, r1, #3 + ldr r3, [r1], #4 + cmp ip, #2 + blt 5f + bgt 6f + +4: mov ip, r3, pull #16 + ldr r3, [r1], #4 + subs r2, r2, #1 + orr ip, ip, r3, push #16 + str ip, [r0] + bne 4b + mov pc, lr + +5: mov ip, r3, pull #8 + ldr r3, [r1], #4 + subs r2, r2, #1 + orr ip, ip, r3, push #24 + str ip, [r0] + bne 5b + mov pc, lr + +6: mov ip, r3, pull #24 + ldr r3, [r1], #4 + subs r2, r2, #1 + orr ip, ip, r3, push #8 + str ip, [r0] + bne 6b + mov pc, lr diff --git a/arch/arm/lib/io-writesw-armv3.S b/arch/arm/lib/io-writesw-armv3.S new file mode 100644 index 00000000000..950e7e310f1 --- /dev/null +++ b/arch/arm/lib/io-writesw-armv3.S @@ -0,0 +1,127 @@ +/* + * linux/arch/arm/lib/io-writesw-armv3.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/hardware.h> + +.outsw_bad_alignment: + adr r0, .outsw_bad_align_msg + mov r2, lr + b panic +.outsw_bad_align_msg: + .asciz "outsw: bad buffer alignment (0x%p, lr=0x%08lX)\n" + .align + +.outsw_align: tst r1, #1 + bne .outsw_bad_alignment + + add r1, r1, #2 + + ldr r3, [r1, #-4] + mov r3, r3, lsr #16 + orr r3, r3, r3, lsl #16 + str r3, [r0] + subs r2, r2, #1 + RETINSTR(moveq, pc, lr) + +ENTRY(__raw_writesw) + teq r2, #0 @ do we have to check for the zero len? + moveq pc, lr + tst r1, #3 + bne .outsw_align + +.outsw_aligned: stmfd sp!, {r4, r5, r6, lr} + + subs r2, r2, #8 + bmi .no_outsw_8 + +.outsw_8_lp: ldmia r1!, {r3, r4, r5, r6} + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r4, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r4, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r5, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r5, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r6, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r6, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + subs r2, r2, #8 + bpl .outsw_8_lp + + tst r2, #7 + LOADREGS(eqfd, sp!, {r4, r5, r6, pc}) + +.no_outsw_8: tst r2, #4 + beq .no_outsw_4 + + ldmia r1!, {r3, r4} + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r4, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r4, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + +.no_outsw_4: tst r2, #2 + beq .no_outsw_2 + + ldr r3, [r1], #4 + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + +.no_outsw_2: tst r2, #1 + + ldrne r3, [r1] + + movne ip, r3, lsl #16 + orrne ip, ip, ip, lsr #16 + strne ip, [r0] + + LOADREGS(fd, sp!, {r4, r5, r6, pc}) diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S new file mode 100644 index 00000000000..6d1d7c27806 --- /dev/null +++ b/arch/arm/lib/io-writesw-armv4.S @@ -0,0 +1,95 @@ +/* + * linux/arch/arm/lib/io-writesw-armv4.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .macro outword, rd +#ifndef __ARMEB__ + strh \rd, [r0] + mov \rd, \rd, lsr #16 + strh \rd, [r0] +#else + mov lr, \rd, lsr #16 + strh lr, [r0] + strh \rd, [r0] +#endif + .endm + +.outsw_align: movs ip, r1, lsl #31 + bne .outsw_noalign + + ldrh r3, [r1], #2 + sub r2, r2, #1 + strh r3, [r0] + +ENTRY(__raw_writesw) + teq r2, #0 + moveq pc, lr + ands r3, r1, #3 + bne .outsw_align + + stmfd sp!, {r4, r5, lr} + + subs r2, r2, #8 + bmi .no_outsw_8 + +.outsw_8_lp: ldmia r1!, {r3, r4, r5, ip} + subs r2, r2, #8 + outword r3 + outword r4 + outword r5 + outword ip + bpl .outsw_8_lp + +.no_outsw_8: tst r2, #4 + beq .no_outsw_4 + + ldmia r1!, {r3, ip} + outword r3 + outword ip + +.no_outsw_4: movs r2, r2, lsl #31 + bcc .no_outsw_2 + + ldr r3, [r1], #4 + outword r3 + +.no_outsw_2: ldrneh r3, [r1] + strneh r3, [r0] + + ldmfd sp!, {r4, r5, pc} + +#ifdef __ARMEB__ +#define pull_hbyte0 lsl #8 +#define push_hbyte1 lsr #24 +#else +#define pull_hbyte0 lsr #24 +#define push_hbyte1 lsl #8 +#endif + +.outsw_noalign: ldr r3, [r1, -r3]! + subcs r2, r2, #1 + bcs 2f + subs r2, r2, #2 + bmi 3f + +1: mov ip, r3, lsr #8 + strh ip, [r0] +2: mov ip, r3, pull_hbyte0 + ldr r3, [r1, #4]! + subs r2, r2, #2 + orr ip, ip, r3, push_hbyte1 + strh ip, [r0] + bpl 2b + +3: tst r2, #1 +2: movne ip, r3, lsr #8 + strneh ip, [r0] + mov pc, lr diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S new file mode 100644 index 00000000000..59026029d01 --- /dev/null +++ b/arch/arm/lib/lib1funcs.S @@ -0,0 +1,314 @@ +/* + * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines + * + * Author: Nicolas Pitre <nico@cam.org> + * - contributed to gcc-3.4 on Sep 30, 2003 + * - adapted for the Linux kernel on Oct 2, 2003 + */ + +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + + +#include <linux/linkage.h> +#include <asm/assembler.h> + + +.macro ARM_DIV_BODY dividend, divisor, result, curbit + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \curbit, \divisor + clz \result, \dividend + sub \result, \curbit, \result + mov \curbit, #1 + mov \divisor, \divisor, lsl \result + mov \curbit, \curbit, lsl \result + mov \result, #0 + +#else + + @ Initially shift the divisor left 3 bits if possible, + @ set curbit accordingly. This allows for curbit to be located + @ at the left end of each 4 bit nibbles in the division loop + @ to save one loop in most cases. + tst \divisor, #0xe0000000 + moveq \divisor, \divisor, lsl #3 + moveq \curbit, #8 + movne \curbit, #1 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + movlo \curbit, \curbit, lsl #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + movlo \curbit, \curbit, lsl #1 + blo 1b + + mov \result, #0 + +#endif + + @ Division loop +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + orrhs \result, \result, \curbit + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + orrhs \result, \result, \curbit, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + orrhs \result, \result, \curbit, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + orrhs \result, \result, \curbit, lsr #3 + cmp \dividend, #0 @ Early termination? + movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? + movne \divisor, \divisor, lsr #4 + bne 1b + +.endm + + +.macro ARM_DIV2_ORDER divisor, order + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + rsb \order, \order, #31 + +#else + + cmp \divisor, #(1 << 16) + movhs \divisor, \divisor, lsr #16 + movhs \order, #16 + movlo \order, #0 + + cmp \divisor, #(1 << 8) + movhs \divisor, \divisor, lsr #8 + addhs \order, \order, #8 + + cmp \divisor, #(1 << 4) + movhs \divisor, \divisor, lsr #4 + addhs \order, \order, #4 + + cmp \divisor, #(1 << 2) + addhi \order, \order, #3 + addls \order, \order, \divisor, lsr #1 + +#endif + +.endm + + +.macro ARM_MOD_BODY dividend, divisor, order, spare + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + clz \spare, \dividend + sub \order, \order, \spare + mov \divisor, \divisor, lsl \order + +#else + + mov \order, #0 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + addlo \order, \order, #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + addlo \order, \order, #1 + blo 1b + +#endif + + @ Perform all needed substractions to keep only the reminder. + @ Do comparisons in batch of 4 first. + subs \order, \order, #3 @ yes, 3 is intended here + blt 2f + +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + cmp \dividend, #1 + mov \divisor, \divisor, lsr #4 + subges \order, \order, #4 + bge 1b + + tst \order, #3 + teqne \dividend, #0 + beq 5f + + @ Either 1, 2 or 3 comparison/substractions are left. +2: cmn \order, #2 + blt 4f + beq 3f + cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +3: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +4: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor +5: +.endm + + +ENTRY(__udivsi3) + + subs r2, r1, #1 + moveq pc, lr + bcc Ldiv0 + cmp r0, r1 + bls 11f + tst r1, r2 + beq 12f + + ARM_DIV_BODY r0, r1, r2, r3 + + mov r0, r2 + mov pc, lr + +11: moveq r0, #1 + movne r0, #0 + mov pc, lr + +12: ARM_DIV2_ORDER r1, r2 + + mov r0, r0, lsr r2 + mov pc, lr + + +ENTRY(__umodsi3) + + subs r2, r1, #1 @ compare divisor with 1 + bcc Ldiv0 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + movls pc, lr + + ARM_MOD_BODY r0, r1, r2, r3 + + mov pc, lr + + +ENTRY(__divsi3) + + cmp r1, #0 + eor ip, r0, r1 @ save the sign of the result. + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + subs r2, r1, #1 @ division by 1 or -1 ? + beq 10f + movs r3, r0 + rsbmi r3, r0, #0 @ positive dividend value + cmp r3, r1 + bls 11f + tst r1, r2 @ divisor is power of 2 ? + beq 12f + + ARM_DIV_BODY r3, r1, r0, r2 + + cmp ip, #0 + rsbmi r0, r0, #0 + mov pc, lr + +10: teq ip, r0 @ same sign ? + rsbmi r0, r0, #0 + mov pc, lr + +11: movlo r0, #0 + moveq r0, ip, asr #31 + orreq r0, r0, #1 + mov pc, lr + +12: ARM_DIV2_ORDER r1, r2 + + cmp ip, #0 + mov r0, r3, lsr r2 + rsbmi r0, r0, #0 + mov pc, lr + + +ENTRY(__modsi3) + + cmp r1, #0 + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + movs ip, r0 @ preserve sign of dividend + rsbmi r0, r0, #0 @ if negative make positive + subs r2, r1, #1 @ compare divisor with 1 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + bls 10f + + ARM_MOD_BODY r0, r1, r2, r3 + +10: cmp ip, #0 + rsbmi r0, r0, #0 + mov pc, lr + + +Ldiv0: + + str lr, [sp, #-4]! + bl __div0 + mov r0, #0 @ About as wrong as it could be. + ldr pc, [sp], #4 + + diff --git a/arch/arm/lib/longlong.h b/arch/arm/lib/longlong.h new file mode 100644 index 00000000000..179eea4edc3 --- /dev/null +++ b/arch/arm/lib/longlong.h @@ -0,0 +1,183 @@ +/* longlong.h -- based on code from gcc-2.95.3 + + definitions for mixed size 32/64 bit arithmetic. + Copyright (C) 1991, 92, 94, 95, 96, 1997, 1998 Free Software Foundation, Inc. + + This definition file is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 2, or (at your option) any later version. + + This definition file is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied + warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* Borrowed from GCC 2.95.3, I Molton 29/07/01 */ + +#ifndef SI_TYPE_SIZE +#define SI_TYPE_SIZE 32 +#endif + +#define __BITS4 (SI_TYPE_SIZE / 4) +#define __ll_B (1L << (SI_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((USItype) (t) % __ll_B) +#define __ll_highpart(t) ((USItype) (t) / __ll_B) + +/* Define auxiliary asm macros. + + 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) + multiplies two USItype integers MULTIPLER and MULTIPLICAND, + and generates a two-part USItype product in HIGH_PROD and + LOW_PROD. + + 2) __umulsidi3(a,b) multiplies two USItype integers A and B, + and returns a UDItype product. This is just a variant of umul_ppmm. + + 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + denominator) divides a two-word unsigned integer, composed by the + integers HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and + places the quotient in QUOTIENT and the remainder in REMAINDER. + HIGH_NUMERATOR must be less than DENOMINATOR for correct operation. + If, in addition, the most significant bit of DENOMINATOR must be 1, + then the pre-processor symbol UDIV_NEEDS_NORMALIZATION is defined to 1. + + 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + denominator). Like udiv_qrnnd but the numbers are signed. The + quotient is rounded towards 0. + + 5) count_leading_zeros(count, x) counts the number of zero-bits from + the msb to the first non-zero bit. This is the number of steps X + needs to be shifted left to set the msb. Undefined for X == 0. + + 6) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, + high_addend_2, low_addend_2) adds two two-word unsigned integers, + composed by HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and + LOW_ADDEND_2 respectively. The result is placed in HIGH_SUM and + LOW_SUM. Overflow (i.e. carry out) is not stored anywhere, and is + lost. + + 7) sub_ddmmss(high_difference, low_difference, high_minuend, + low_minuend, high_subtrahend, low_subtrahend) subtracts two + two-word unsigned integers, composed by HIGH_MINUEND_1 and + LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and LOW_SUBTRAHEND_2 + respectively. The result is placed in HIGH_DIFFERENCE and + LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, + and is lost. + + If any of these macros are left undefined for a particular CPU, + C macros are used. */ + +#if defined (__arm__) +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("adds %1, %4, %5 \n\ + adc %0, %2, %3" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "%r" ((USItype) (ah)), \ + "rI" ((USItype) (bh)), \ + "%r" ((USItype) (al)), \ + "rI" ((USItype) (bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subs %1, %4, %5 \n\ + sbc %0, %2, %3" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "r" ((USItype) (ah)), \ + "rI" ((USItype) (bh)), \ + "r" ((USItype) (al)), \ + "rI" ((USItype) (bl))) +#define umul_ppmm(xh, xl, a, b) \ +{register USItype __t0, __t1, __t2; \ + __asm__ ("%@ Inlined umul_ppmm \n\ + mov %2, %5, lsr #16 \n\ + mov %0, %6, lsr #16 \n\ + bic %3, %5, %2, lsl #16 \n\ + bic %4, %6, %0, lsl #16 \n\ + mul %1, %3, %4 \n\ + mul %4, %2, %4 \n\ + mul %3, %0, %3 \n\ + mul %0, %2, %0 \n\ + adds %3, %4, %3 \n\ + addcs %0, %0, #65536 \n\ + adds %1, %1, %3, lsl #16 \n\ + adc %0, %0, %3, lsr #16" \ + : "=&r" ((USItype) (xh)), \ + "=r" ((USItype) (xl)), \ + "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \ + : "r" ((USItype) (a)), \ + "r" ((USItype) (b)));} +#define UMUL_TIME 20 +#define UDIV_TIME 100 +#endif /* __arm__ */ + +#define __umulsidi3(u, v) \ + ({DIunion __w; \ + umul_ppmm (__w.s.high, __w.s.low, u, v); \ + __w.ll; }) + +#define __udiv_qrnnd_c(q, r, n1, n0, d) \ + do { \ + USItype __d1, __d0, __q1, __q0; \ + USItype __r1, __r0, __m; \ + __d1 = __ll_highpart (d); \ + __d0 = __ll_lowpart (d); \ + \ + __r1 = (n1) % __d1; \ + __q1 = (n1) / __d1; \ + __m = (USItype) __q1 * __d0; \ + __r1 = __r1 * __ll_B | __ll_highpart (n0); \ + if (__r1 < __m) \ + { \ + __q1--, __r1 += (d); \ + if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ + if (__r1 < __m) \ + __q1--, __r1 += (d); \ + } \ + __r1 -= __m; \ + \ + __r0 = __r1 % __d1; \ + __q0 = __r1 / __d1; \ + __m = (USItype) __q0 * __d0; \ + __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ + if (__r0 < __m) \ + { \ + __q0--, __r0 += (d); \ + if (__r0 >= (d)) \ + if (__r0 < __m) \ + __q0--, __r0 += (d); \ + } \ + __r0 -= __m; \ + \ + (q) = (USItype) __q1 * __ll_B | __q0; \ + (r) = __r0; \ + } while (0) + +#define UDIV_NEEDS_NORMALIZATION 1 +#define udiv_qrnnd __udiv_qrnnd_c + +#define count_leading_zeros(count, x) \ + do { \ + USItype __xr = (x); \ + USItype __a; \ + \ + if (SI_TYPE_SIZE <= 32) \ + { \ + __a = __xr < ((USItype)1<<2*__BITS4) \ + ? (__xr < ((USItype)1<<__BITS4) ? 0 : __BITS4) \ + : (__xr < ((USItype)1<<3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \ + } \ + else \ + { \ + for (__a = SI_TYPE_SIZE - 8; __a > 0; __a -= 8) \ + if (((__xr >> __a) & 0xff) != 0) \ + break; \ + } \ + \ + (count) = SI_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \ + } while (0) diff --git a/arch/arm/lib/lshrdi3.c b/arch/arm/lib/lshrdi3.c new file mode 100644 index 00000000000..b666f1bad45 --- /dev/null +++ b/arch/arm/lib/lshrdi3.c @@ -0,0 +1,61 @@ +/* More subroutines needed by GCC output code on some machines. */ +/* Compile this one with gcc. */ +/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you link this library with other files, + some of which are compiled with GCC, to produce an executable, + this library does not by itself cause the resulting executable + to be covered by the GNU General Public License. + This exception does not however invalidate any other reasons why + the executable file might be covered by the GNU General Public License. + */ +/* support functions required by the kernel. based on code from gcc-2.95.3 */ +/* I Molton 29/07/01 */ + +#include "gcclib.h" + +DItype +__lshrdi3 (DItype u, word_type b) +{ + DIunion w; + word_type bm; + DIunion uu; + + if (b == 0) + return u; + + uu.ll = u; + + bm = (sizeof (SItype) * BITS_PER_UNIT) - b; + if (bm <= 0) + { + w.s.high = 0; + w.s.low = (USItype)uu.s.high >> -bm; + } + else + { + USItype carries = (USItype)uu.s.high << bm; + w.s.high = (USItype)uu.s.high >> b; + w.s.low = ((USItype)uu.s.low >> b) | carries; + } + + return w.ll; +} + diff --git a/arch/arm/lib/memchr.S b/arch/arm/lib/memchr.S new file mode 100644 index 00000000000..ac34fe55d21 --- /dev/null +++ b/arch/arm/lib/memchr.S @@ -0,0 +1,25 @@ +/* + * linux/arch/arm/lib/memchr.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .align 5 +ENTRY(memchr) +1: subs r2, r2, #1 + bmi 2f + ldrb r3, [r0], #1 + teq r3, r1 + bne 1b + sub r0, r0, #1 +2: movne r0, #0 + RETINSTR(mov,pc,lr) diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S new file mode 100644 index 00000000000..f5a593ceb8c --- /dev/null +++ b/arch/arm/lib/memcpy.S @@ -0,0 +1,393 @@ +/* + * linux/arch/arm/lib/memcpy.S + * + * Copyright (C) 1995-1999 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +#define ENTER \ + mov ip,sp ;\ + stmfd sp!,{r0,r4-r9,fp,ip,lr,pc} ;\ + sub fp,ip,#4 + +#define EXIT \ + LOADREGS(ea, fp, {r0, r4 - r9, fp, sp, pc}) + +#define EXITEQ \ + LOADREGS(eqea, fp, {r0, r4 - r9, fp, sp, pc}) + +/* + * Prototype: void memcpy(void *to,const void *from,unsigned long n); + */ +ENTRY(memcpy) +ENTRY(memmove) + ENTER + cmp r1, r0 + bcc 23f + subs r2, r2, #4 + blt 6f + PLD( pld [r1, #0] ) + ands ip, r0, #3 + bne 7f + ands ip, r1, #3 + bne 8f + +1: subs r2, r2, #8 + blt 5f + subs r2, r2, #20 + blt 4f + PLD( pld [r1, #28] ) + PLD( subs r2, r2, #64 ) + PLD( blt 3f ) +2: PLD( pld [r1, #60] ) + PLD( pld [r1, #92] ) + ldmia r1!, {r3 - r9, ip} + subs r2, r2, #32 + stmgeia r0!, {r3 - r9, ip} + ldmgeia r1!, {r3 - r9, ip} + subges r2, r2, #32 + stmia r0!, {r3 - r9, ip} + bge 2b +3: PLD( ldmia r1!, {r3 - r9, ip} ) + PLD( adds r2, r2, #32 ) + PLD( stmgeia r0!, {r3 - r9, ip} ) + PLD( ldmgeia r1!, {r3 - r9, ip} ) + PLD( subges r2, r2, #32 ) + PLD( stmia r0!, {r3 - r9, ip} ) +4: cmn r2, #16 + ldmgeia r1!, {r3 - r6} + subge r2, r2, #16 + stmgeia r0!, {r3 - r6} + adds r2, r2, #20 + ldmgeia r1!, {r3 - r5} + subge r2, r2, #12 + stmgeia r0!, {r3 - r5} +5: adds r2, r2, #8 + blt 6f + subs r2, r2, #4 + ldrlt r3, [r1], #4 + ldmgeia r1!, {r4, r5} + subge r2, r2, #4 + strlt r3, [r0], #4 + stmgeia r0!, {r4, r5} + +6: adds r2, r2, #4 + EXITEQ + cmp r2, #2 + ldrb r3, [r1], #1 + ldrgeb r4, [r1], #1 + ldrgtb r5, [r1], #1 + strb r3, [r0], #1 + strgeb r4, [r0], #1 + strgtb r5, [r0], #1 + EXIT + +7: rsb ip, ip, #4 + cmp ip, #2 + ldrb r3, [r1], #1 + ldrgeb r4, [r1], #1 + ldrgtb r5, [r1], #1 + strb r3, [r0], #1 + strgeb r4, [r0], #1 + strgtb r5, [r0], #1 + subs r2, r2, ip + blt 6b + ands ip, r1, #3 + beq 1b + +8: bic r1, r1, #3 + ldr r7, [r1], #4 + cmp ip, #2 + bgt 18f + beq 13f + cmp r2, #12 + blt 11f + PLD( pld [r1, #12] ) + sub r2, r2, #12 + PLD( subs r2, r2, #32 ) + PLD( blt 10f ) + PLD( pld [r1, #28] ) +9: PLD( pld [r1, #44] ) +10: mov r3, r7, pull #8 + ldmia r1!, {r4 - r7} + subs r2, r2, #16 + orr r3, r3, r4, push #24 + mov r4, r4, pull #8 + orr r4, r4, r5, push #24 + mov r5, r5, pull #8 + orr r5, r5, r6, push #24 + mov r6, r6, pull #8 + orr r6, r6, r7, push #24 + stmia r0!, {r3 - r6} + bge 9b + PLD( cmn r2, #32 ) + PLD( bge 10b ) + PLD( add r2, r2, #32 ) + adds r2, r2, #12 + blt 12f +11: mov r3, r7, pull #8 + ldr r7, [r1], #4 + subs r2, r2, #4 + orr r3, r3, r7, push #24 + str r3, [r0], #4 + bge 11b +12: sub r1, r1, #3 + b 6b + +13: cmp r2, #12 + blt 16f + PLD( pld [r1, #12] ) + sub r2, r2, #12 + PLD( subs r2, r2, #32 ) + PLD( blt 15f ) + PLD( pld [r1, #28] ) +14: PLD( pld [r1, #44] ) +15: mov r3, r7, pull #16 + ldmia r1!, {r4 - r7} + subs r2, r2, #16 + orr r3, r3, r4, push #16 + mov r4, r4, pull #16 + orr r4, r4, r5, push #16 + mov r5, r5, pull #16 + orr r5, r5, r6, push #16 + mov r6, r6, pull #16 + orr r6, r6, r7, push #16 + stmia r0!, {r3 - r6} + bge 14b + PLD( cmn r2, #32 ) + PLD( bge 15b ) + PLD( add r2, r2, #32 ) + adds r2, r2, #12 + blt 17f +16: mov r3, r7, pull #16 + ldr r7, [r1], #4 + subs r2, r2, #4 + orr r3, r3, r7, push #16 + str r3, [r0], #4 + bge 16b +17: sub r1, r1, #2 + b 6b + +18: cmp r2, #12 + blt 21f + PLD( pld [r1, #12] ) + sub r2, r2, #12 + PLD( subs r2, r2, #32 ) + PLD( blt 20f ) + PLD( pld [r1, #28] ) +19: PLD( pld [r1, #44] ) +20: mov r3, r7, pull #24 + ldmia r1!, {r4 - r7} + subs r2, r2, #16 + orr r3, r3, r4, push #8 + mov r4, r4, pull #24 + orr r4, r4, r5, push #8 + mov r5, r5, pull #24 + orr r5, r5, r6, push #8 + mov r6, r6, pull #24 + orr r6, r6, r7, push #8 + stmia r0!, {r3 - r6} + bge 19b + PLD( cmn r2, #32 ) + PLD( bge 20b ) + PLD( add r2, r2, #32 ) + adds r2, r2, #12 + blt 22f +21: mov r3, r7, pull #24 + ldr r7, [r1], #4 + subs r2, r2, #4 + orr r3, r3, r7, push #8 + str r3, [r0], #4 + bge 21b +22: sub r1, r1, #1 + b 6b + + +23: add r1, r1, r2 + add r0, r0, r2 + subs r2, r2, #4 + blt 29f + PLD( pld [r1, #-4] ) + ands ip, r0, #3 + bne 30f + ands ip, r1, #3 + bne 31f + +24: subs r2, r2, #8 + blt 28f + subs r2, r2, #20 + blt 27f + PLD( pld [r1, #-32] ) + PLD( subs r2, r2, #64 ) + PLD( blt 26f ) +25: PLD( pld [r1, #-64] ) + PLD( pld [r1, #-96] ) + ldmdb r1!, {r3 - r9, ip} + subs r2, r2, #32 + stmgedb r0!, {r3 - r9, ip} + ldmgedb r1!, {r3 - r9, ip} + subges r2, r2, #32 + stmdb r0!, {r3 - r9, ip} + bge 25b +26: PLD( ldmdb r1!, {r3 - r9, ip} ) + PLD( adds r2, r2, #32 ) + PLD( stmgedb r0!, {r3 - r9, ip} ) + PLD( ldmgedb r1!, {r3 - r9, ip} ) + PLD( subges r2, r2, #32 ) + PLD( stmdb r0!, {r3 - r9, ip} ) +27: cmn r2, #16 + ldmgedb r1!, {r3 - r6} + subge r2, r2, #16 + stmgedb r0!, {r3 - r6} + adds r2, r2, #20 + ldmgedb r1!, {r3 - r5} + subge r2, r2, #12 + stmgedb r0!, {r3 - r5} +28: adds r2, r2, #8 + blt 29f + subs r2, r2, #4 + ldrlt r3, [r1, #-4]! + ldmgedb r1!, {r4, r5} + subge r2, r2, #4 + strlt r3, [r0, #-4]! + stmgedb r0!, {r4, r5} + +29: adds r2, r2, #4 + EXITEQ + cmp r2, #2 + ldrb r3, [r1, #-1]! + ldrgeb r4, [r1, #-1]! + ldrgtb r5, [r1, #-1]! + strb r3, [r0, #-1]! + strgeb r4, [r0, #-1]! + strgtb r5, [r0, #-1]! + EXIT + +30: cmp ip, #2 + ldrb r3, [r1, #-1]! + ldrgeb r4, [r1, #-1]! + ldrgtb r5, [r1, #-1]! + strb r3, [r0, #-1]! + strgeb r4, [r0, #-1]! + strgtb r5, [r0, #-1]! + subs r2, r2, ip + blt 29b + ands ip, r1, #3 + beq 24b + +31: bic r1, r1, #3 + ldr r3, [r1], #0 + cmp ip, #2 + blt 41f + beq 36f + cmp r2, #12 + blt 34f + PLD( pld [r1, #-16] ) + sub r2, r2, #12 + PLD( subs r2, r2, #32 ) + PLD( blt 33f ) + PLD( pld [r1, #-32] ) +32: PLD( pld [r1, #-48] ) +33: mov r7, r3, push #8 + ldmdb r1!, {r3, r4, r5, r6} + subs r2, r2, #16 + orr r7, r7, r6, pull #24 + mov r6, r6, push #8 + orr r6, r6, r5, pull #24 + mov r5, r5, push #8 + orr r5, r5, r4, pull #24 + mov r4, r4, push #8 + orr r4, r4, r3, pull #24 + stmdb r0!, {r4, r5, r6, r7} + bge 32b + PLD( cmn r2, #32 ) + PLD( bge 33b ) + PLD( add r2, r2, #32 ) + adds r2, r2, #12 + blt 35f +34: mov ip, r3, push #8 + ldr r3, [r1, #-4]! + subs r2, r2, #4 + orr ip, ip, r3, pull #24 + str ip, [r0, #-4]! + bge 34b +35: add r1, r1, #3 + b 29b + +36: cmp r2, #12 + blt 39f + PLD( pld [r1, #-16] ) + sub r2, r2, #12 + PLD( subs r2, r2, #32 ) + PLD( blt 38f ) + PLD( pld [r1, #-32] ) +37: PLD( pld [r1, #-48] ) +38: mov r7, r3, push #16 + ldmdb r1!, {r3, r4, r5, r6} + subs r2, r2, #16 + orr r7, r7, r6, pull #16 + mov r6, r6, push #16 + orr r6, r6, r5, pull #16 + mov r5, r5, push #16 + orr r5, r5, r4, pull #16 + mov r4, r4, push #16 + orr r4, r4, r3, pull #16 + stmdb r0!, {r4, r5, r6, r7} + bge 37b + PLD( cmn r2, #32 ) + PLD( bge 38b ) + PLD( add r2, r2, #32 ) + adds r2, r2, #12 + blt 40f +39: mov ip, r3, push #16 + ldr r3, [r1, #-4]! + subs r2, r2, #4 + orr ip, ip, r3, pull #16 + str ip, [r0, #-4]! + bge 39b +40: add r1, r1, #2 + b 29b + +41: cmp r2, #12 + blt 44f + PLD( pld [r1, #-16] ) + sub r2, r2, #12 + PLD( subs r2, r2, #32 ) + PLD( blt 43f ) + PLD( pld [r1, #-32] ) +42: PLD( pld [r1, #-48] ) +43: mov r7, r3, push #24 + ldmdb r1!, {r3, r4, r5, r6} + subs r2, r2, #16 + orr r7, r7, r6, pull #8 + mov r6, r6, push #24 + orr r6, r6, r5, pull #8 + mov r5, r5, push #24 + orr r5, r5, r4, pull #8 + mov r4, r4, push #24 + orr r4, r4, r3, pull #8 + stmdb r0!, {r4, r5, r6, r7} + bge 42b + PLD( cmn r2, #32 ) + PLD( bge 43b ) + PLD( add r2, r2, #32 ) + adds r2, r2, #12 + blt 45f +44: mov ip, r3, push #24 + ldr r3, [r1, #-4]! + subs r2, r2, #4 + orr ip, ip, r3, pull #8 + str ip, [r0, #-4]! + bge 44b +45: add r1, r1, #1 + b 29b + diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S new file mode 100644 index 00000000000..a1795f59993 --- /dev/null +++ b/arch/arm/lib/memset.S @@ -0,0 +1,80 @@ +/* + * linux/arch/arm/lib/memset.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .align 5 + .word 0 + +1: subs r2, r2, #4 @ 1 do we have enough + blt 5f @ 1 bytes to align with? + cmp r3, #2 @ 1 + strltb r1, [r0], #1 @ 1 + strleb r1, [r0], #1 @ 1 + strb r1, [r0], #1 @ 1 + add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) +/* + * The pointer is now aligned and the length is adjusted. Try doing the + * memzero again. + */ + +ENTRY(memset) + ands r3, r0, #3 @ 1 unaligned? + bne 1b @ 1 +/* + * we know that the pointer in r0 is aligned to a word boundary. + */ + orr r1, r1, r1, lsl #8 + orr r1, r1, r1, lsl #16 + mov r3, r1 + cmp r2, #16 + blt 4f +/* + * We need an extra register for this loop - save the return address and + * use the LR + */ + str lr, [sp, #-4]! + mov ip, r1 + mov lr, r1 + +2: subs r2, r2, #64 + stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. + stmgeia r0!, {r1, r3, ip, lr} + stmgeia r0!, {r1, r3, ip, lr} + stmgeia r0!, {r1, r3, ip, lr} + bgt 2b + LOADREGS(eqfd, sp!, {pc}) @ Now <64 bytes to go. +/* + * No need to correct the count; we're only testing bits from now on + */ + tst r2, #32 + stmneia r0!, {r1, r3, ip, lr} + stmneia r0!, {r1, r3, ip, lr} + tst r2, #16 + stmneia r0!, {r1, r3, ip, lr} + ldr lr, [sp], #4 + +4: tst r2, #8 + stmneia r0!, {r1, r3} + tst r2, #4 + strne r1, [r0], #4 +/* + * When we get here, we've got less than 4 bytes to zero. We + * may have an unaligned pointer as well. + */ +5: tst r2, #2 + strneb r1, [r0], #1 + strneb r1, [r0], #1 + tst r2, #1 + strneb r1, [r0], #1 + RETINSTR(mov,pc,lr) diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S new file mode 100644 index 00000000000..51ccc60160f --- /dev/null +++ b/arch/arm/lib/memzero.S @@ -0,0 +1,80 @@ +/* + * linux/arch/arm/lib/memzero.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .align 5 + .word 0 +/* + * Align the pointer in r0. r3 contains the number of bytes that we are + * mis-aligned by, and r1 is the number of bytes. If r1 < 4, then we + * don't bother; we use byte stores instead. + */ +1: subs r1, r1, #4 @ 1 do we have enough + blt 5f @ 1 bytes to align with? + cmp r3, #2 @ 1 + strltb r2, [r0], #1 @ 1 + strleb r2, [r0], #1 @ 1 + strb r2, [r0], #1 @ 1 + add r1, r1, r3 @ 1 (r1 = r1 - (4 - r3)) +/* + * The pointer is now aligned and the length is adjusted. Try doing the + * memzero again. + */ + +ENTRY(__memzero) + mov r2, #0 @ 1 + ands r3, r0, #3 @ 1 unaligned? + bne 1b @ 1 +/* + * r3 = 0, and we know that the pointer in r0 is aligned to a word boundary. + */ + cmp r1, #16 @ 1 we can skip this chunk if we + blt 4f @ 1 have < 16 bytes +/* + * We need an extra register for this loop - save the return address and + * use the LR + */ + str lr, [sp, #-4]! @ 1 + mov ip, r2 @ 1 + mov lr, r2 @ 1 + +3: subs r1, r1, #64 @ 1 write 32 bytes out per loop + stmgeia r0!, {r2, r3, ip, lr} @ 4 + stmgeia r0!, {r2, r3, ip, lr} @ 4 + stmgeia r0!, {r2, r3, ip, lr} @ 4 + stmgeia r0!, {r2, r3, ip, lr} @ 4 + bgt 3b @ 1 + LOADREGS(eqfd, sp!, {pc}) @ 1/2 quick exit +/* + * No need to correct the count; we're only testing bits from now on + */ + tst r1, #32 @ 1 + stmneia r0!, {r2, r3, ip, lr} @ 4 + stmneia r0!, {r2, r3, ip, lr} @ 4 + tst r1, #16 @ 1 16 bytes or more? + stmneia r0!, {r2, r3, ip, lr} @ 4 + ldr lr, [sp], #4 @ 1 + +4: tst r1, #8 @ 1 8 bytes or more? + stmneia r0!, {r2, r3} @ 2 + tst r1, #4 @ 1 4 bytes or more? + strne r2, [r0], #4 @ 1 +/* + * When we get here, we've got less than 4 bytes to zero. We + * may have an unaligned pointer as well. + */ +5: tst r1, #2 @ 1 2 bytes or more? + strneb r2, [r0], #1 @ 1 + strneb r2, [r0], #1 @ 1 + tst r1, #1 @ 1 a byte left over + strneb r2, [r0], #1 @ 1 + RETINSTR(mov,pc,lr) @ 1 diff --git a/arch/arm/lib/muldi3.c b/arch/arm/lib/muldi3.c new file mode 100644 index 00000000000..44d611b1cfd --- /dev/null +++ b/arch/arm/lib/muldi3.c @@ -0,0 +1,77 @@ +/* More subroutines needed by GCC output code on some machines. */ +/* Compile this one with gcc. */ +/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you link this library with other files, + some of which are compiled with GCC, to produce an executable, + this library does not by itself cause the resulting executable + to be covered by the GNU General Public License. + This exception does not however invalidate any other reasons why + the executable file might be covered by the GNU General Public License. + */ +/* support functions required by the kernel. based on code from gcc-2.95.3 */ +/* I Molton 29/07/01 */ + +#include "gcclib.h" + +#define umul_ppmm(xh, xl, a, b) \ +{register USItype __t0, __t1, __t2; \ + __asm__ ("%@ Inlined umul_ppmm \n\ + mov %2, %5, lsr #16 \n\ + mov %0, %6, lsr #16 \n\ + bic %3, %5, %2, lsl #16 \n\ + bic %4, %6, %0, lsl #16 \n\ + mul %1, %3, %4 \n\ + mul %4, %2, %4 \n\ + mul %3, %0, %3 \n\ + mul %0, %2, %0 \n\ + adds %3, %4, %3 \n\ + addcs %0, %0, #65536 \n\ + adds %1, %1, %3, lsl #16 \n\ + adc %0, %0, %3, lsr #16" \ + : "=&r" ((USItype) (xh)), \ + "=r" ((USItype) (xl)), \ + "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \ + : "r" ((USItype) (a)), \ + "r" ((USItype) (b)));} + + +#define __umulsidi3(u, v) \ + ({DIunion __w; \ + umul_ppmm (__w.s.high, __w.s.low, u, v); \ + __w.ll; }) + + +DItype +__muldi3 (DItype u, DItype v) +{ + DIunion w; + DIunion uu, vv; + + uu.ll = u, + vv.ll = v; + + w.ll = __umulsidi3 (uu.s.low, vv.s.low); + w.s.high += ((USItype) uu.s.low * (USItype) vv.s.high + + (USItype) uu.s.high * (USItype) vv.s.low); + + return w.ll; +} + diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S new file mode 100644 index 00000000000..b09398d95aa --- /dev/null +++ b/arch/arm/lib/putuser.S @@ -0,0 +1,76 @@ +/* + * linux/arch/arm/lib/putuser.S + * + * Copyright (C) 2001 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Idea from x86 version, (C) Copyright 1998 Linus Torvalds + * + * These functions have a non-standard call interface to make + * them more efficient, especially as they return an error + * value in addition to the "real" return value. + * + * __put_user_X + * + * Inputs: r0 contains the address + * r2, r3 contains the value + * Outputs: r0 is the error code + * lr corrupted + * + * No other registers must be altered. (see include/asm-arm/uaccess.h + * for specific ASM register usage). + * + * Note that ADDR_LIMIT is either 0 or 0xc0000000 + * Note also that it is intended that __put_user_bad is not global. + */ +#include <asm/constants.h> +#include <asm/thread_info.h> +#include <asm/errno.h> + + .global __put_user_1 +__put_user_1: +1: strbt r2, [r0] + mov r0, #0 + mov pc, lr + + .global __put_user_2 +__put_user_2: + mov ip, r2, lsr #8 +#ifndef __ARMEB__ +2: strbt r2, [r0], #1 +3: strbt ip, [r0] +#else +2: strbt ip, [r0], #1 +3: strbt r2, [r0] +#endif + mov r0, #0 + mov pc, lr + + .global __put_user_4 +__put_user_4: +4: strt r2, [r0] + mov r0, #0 + mov pc, lr + + .global __put_user_8 +__put_user_8: +5: strt r2, [r0], #4 +6: strt r3, [r0] + mov r0, #0 + mov pc, lr + +__put_user_bad: + mov r0, #-EFAULT + mov pc, lr + +.section __ex_table, "a" + .long 1b, __put_user_bad + .long 2b, __put_user_bad + .long 3b, __put_user_bad + .long 4b, __put_user_bad + .long 5b, __put_user_bad + .long 6b, __put_user_bad +.previous diff --git a/arch/arm/lib/setbit.S b/arch/arm/lib/setbit.S new file mode 100644 index 00000000000..8f337df5d99 --- /dev/null +++ b/arch/arm/lib/setbit.S @@ -0,0 +1,29 @@ +/* + * linux/arch/arm/lib/setbit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +/* + * Purpose : Function to set a bit + * Prototype: int set_bit(int bit, void *addr) + */ +ENTRY(_set_bit_be) + eor r0, r0, #0x18 @ big endian byte ordering +ENTRY(_set_bit_le) + and r2, r0, #7 + mov r3, #1 + mov r3, r3, lsl r2 + save_and_disable_irqs ip, r2 + ldrb r2, [r1, r0, lsr #3] + orr r2, r2, r3 + strb r2, [r1, r0, lsr #3] + restore_irqs ip + RETINSTR(mov,pc,lr) diff --git a/arch/arm/lib/strchr.S b/arch/arm/lib/strchr.S new file mode 100644 index 00000000000..5b9b493733f --- /dev/null +++ b/arch/arm/lib/strchr.S @@ -0,0 +1,26 @@ +/* + * linux/arch/arm/lib/strchr.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .align 5 +ENTRY(strchr) + and r1, r1, #0xff +1: ldrb r2, [r0], #1 + teq r2, r1 + teqne r2, #0 + bne 1b + teq r2, r1 + movne r0, #0 + subeq r0, r0, #1 + RETINSTR(mov,pc,lr) diff --git a/arch/arm/lib/strncpy_from_user.S b/arch/arm/lib/strncpy_from_user.S new file mode 100644 index 00000000000..629cc877527 --- /dev/null +++ b/arch/arm/lib/strncpy_from_user.S @@ -0,0 +1,43 @@ +/* + * linux/arch/arm/lib/strncpy_from_user.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/errno.h> + + .text + .align 5 + +/* + * Copy a string from user space to kernel space. + * r0 = dst, r1 = src, r2 = byte length + * returns the number of characters copied (strlen of copied string), + * -EFAULT on exception, or "len" if we fill the whole buffer + */ +ENTRY(__arch_strncpy_from_user) + save_lr + mov ip, r1 +1: subs r2, r2, #1 +USER( ldrplbt r3, [r1], #1) + bmi 2f + strb r3, [r0], #1 + teq r3, #0 + bne 1b + sub r1, r1, #1 @ take NUL character out of count +2: sub r0, r1, ip + restore_pc + + .section .fixup,"ax" + .align 0 +9001: mov r3, #0 + strb r3, [r0, #0] @ null terminate + mov r0, #-EFAULT + restore_pc + .previous + diff --git a/arch/arm/lib/strnlen_user.S b/arch/arm/lib/strnlen_user.S new file mode 100644 index 00000000000..67bcd826812 --- /dev/null +++ b/arch/arm/lib/strnlen_user.S @@ -0,0 +1,40 @@ +/* + * linux/arch/arm/lib/strnlen_user.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/errno.h> + + .text + .align 5 + +/* Prototype: unsigned long __arch_strnlen_user(const char *str, long n) + * Purpose : get length of a string in user memory + * Params : str - address of string in user memory + * Returns : length of string *including terminator* + * or zero on exception, or n + 1 if too long + */ +ENTRY(__arch_strnlen_user) + save_lr + mov r2, r0 +1: +USER( ldrbt r3, [r0], #1) + teq r3, #0 + beq 2f + subs r1, r1, #1 + bne 1b + add r0, r0, #1 +2: sub r0, r0, r2 + restore_pc + + .section .fixup,"ax" + .align 0 +9001: mov r0, #0 + restore_pc + .previous diff --git a/arch/arm/lib/strrchr.S b/arch/arm/lib/strrchr.S new file mode 100644 index 00000000000..fa923f026f1 --- /dev/null +++ b/arch/arm/lib/strrchr.S @@ -0,0 +1,25 @@ +/* + * linux/arch/arm/lib/strrchr.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .align 5 +ENTRY(strrchr) + mov r3, #0 +1: ldrb r2, [r0], #1 + teq r2, r1 + subeq r3, r0, #1 + teq r2, #0 + bne 1b + mov r0, r3 + RETINSTR(mov,pc,lr) diff --git a/arch/arm/lib/testchangebit.S b/arch/arm/lib/testchangebit.S new file mode 100644 index 00000000000..4aba4676b98 --- /dev/null +++ b/arch/arm/lib/testchangebit.S @@ -0,0 +1,29 @@ +/* + * linux/arch/arm/lib/testchangebit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +ENTRY(_test_and_change_bit_be) + eor r0, r0, #0x18 @ big endian byte ordering +ENTRY(_test_and_change_bit_le) + add r1, r1, r0, lsr #3 + and r3, r0, #7 + mov r0, #1 + save_and_disable_irqs ip, r2 + ldrb r2, [r1] + tst r2, r0, lsl r3 + eor r2, r2, r0, lsl r3 + strb r2, [r1] + restore_irqs ip + moveq r0, #0 + RETINSTR(mov,pc,lr) + + diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S new file mode 100644 index 00000000000..e07c5bd2430 --- /dev/null +++ b/arch/arm/lib/testclearbit.S @@ -0,0 +1,29 @@ +/* + * linux/arch/arm/lib/testclearbit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +ENTRY(_test_and_clear_bit_be) + eor r0, r0, #0x18 @ big endian byte ordering +ENTRY(_test_and_clear_bit_le) + add r1, r1, r0, lsr #3 @ Get byte offset + and r3, r0, #7 @ Get bit offset + mov r0, #1 + save_and_disable_irqs ip, r2 + ldrb r2, [r1] + tst r2, r0, lsl r3 + bic r2, r2, r0, lsl r3 + strb r2, [r1] + restore_irqs ip + moveq r0, #0 + RETINSTR(mov,pc,lr) + + diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S new file mode 100644 index 00000000000..a570fc74cdd --- /dev/null +++ b/arch/arm/lib/testsetbit.S @@ -0,0 +1,29 @@ +/* + * linux/arch/arm/lib/testsetbit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +ENTRY(_test_and_set_bit_be) + eor r0, r0, #0x18 @ big endian byte ordering +ENTRY(_test_and_set_bit_le) + add r1, r1, r0, lsr #3 @ Get byte offset + and r3, r0, #7 @ Get bit offset + mov r0, #1 + save_and_disable_irqs ip, r2 + ldrb r2, [r1] + tst r2, r0, lsl r3 + orr r2, r2, r0, lsl r3 + strb r2, [r1] + restore_irqs ip + moveq r0, #0 + RETINSTR(mov,pc,lr) + + diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S new file mode 100644 index 00000000000..d3ed0636c00 --- /dev/null +++ b/arch/arm/lib/uaccess.S @@ -0,0 +1,697 @@ +/* + * linux/arch/arm/lib/uaccess.S + * + * Copyright (C) 1995, 1996,1997,1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Routines to block copy data to/from user memory + * These are highly optimised both for the 4k page size + * and for various alignments. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/errno.h> + + .text + +#define PAGE_SHIFT 12 + +/* Prototype: int __arch_copy_to_user(void *to, const char *from, size_t n) + * Purpose : copy a block to user memory from kernel memory + * Params : to - user memory + * : from - kernel memory + * : n - number of bytes to copy + * Returns : Number of bytes NOT copied. + */ + +.c2u_dest_not_aligned: + rsb ip, ip, #4 + cmp ip, #2 + ldrb r3, [r1], #1 +USER( strbt r3, [r0], #1) @ May fault + ldrgeb r3, [r1], #1 +USER( strgebt r3, [r0], #1) @ May fault + ldrgtb r3, [r1], #1 +USER( strgtbt r3, [r0], #1) @ May fault + sub r2, r2, ip + b .c2u_dest_aligned + +ENTRY(__arch_copy_to_user) + stmfd sp!, {r2, r4 - r7, lr} + cmp r2, #4 + blt .c2u_not_enough + PLD( pld [r1, #0] ) + PLD( pld [r0, #0] ) + ands ip, r0, #3 + bne .c2u_dest_not_aligned +.c2u_dest_aligned: + + ands ip, r1, #3 + bne .c2u_src_not_aligned +/* + * Seeing as there has to be at least 8 bytes to copy, we can + * copy one word, and force a user-mode page fault... + */ + +.c2u_0fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .c2u_0nowords + ldr r3, [r1], #4 +USER( strt r3, [r0], #4) @ May fault + mov ip, r0, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .c2u_0fupi +/* + * ip = max no. of bytes to copy before needing another "strt" insn + */ + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #32 + blt .c2u_0rem8lp + PLD( pld [r1, #28] ) + PLD( pld [r0, #28] ) + PLD( subs ip, ip, #64 ) + PLD( blt .c2u_0cpynopld ) + PLD( pld [r1, #60] ) + PLD( pld [r0, #60] ) + +.c2u_0cpy8lp: + PLD( pld [r1, #92] ) + PLD( pld [r0, #92] ) +.c2u_0cpynopld: ldmia r1!, {r3 - r6} + stmia r0!, {r3 - r6} @ Shouldnt fault + ldmia r1!, {r3 - r6} + subs ip, ip, #32 + stmia r0!, {r3 - r6} @ Shouldnt fault + bpl .c2u_0cpy8lp + PLD( cmn ip, #64 ) + PLD( bge .c2u_0cpynopld ) + PLD( add ip, ip, #64 ) + +.c2u_0rem8lp: cmn ip, #16 + ldmgeia r1!, {r3 - r6} + stmgeia r0!, {r3 - r6} @ Shouldnt fault + tst ip, #8 + ldmneia r1!, {r3 - r4} + stmneia r0!, {r3 - r4} @ Shouldnt fault + tst ip, #4 + ldrne r3, [r1], #4 + strnet r3, [r0], #4 @ Shouldnt fault + ands ip, ip, #3 + beq .c2u_0fupi +.c2u_0nowords: teq ip, #0 + beq .c2u_finished +.c2u_nowords: cmp ip, #2 + ldrb r3, [r1], #1 +USER( strbt r3, [r0], #1) @ May fault + ldrgeb r3, [r1], #1 +USER( strgebt r3, [r0], #1) @ May fault + ldrgtb r3, [r1], #1 +USER( strgtbt r3, [r0], #1) @ May fault + b .c2u_finished + +.c2u_not_enough: + movs ip, r2 + bne .c2u_nowords +.c2u_finished: mov r0, #0 + LOADREGS(fd,sp!,{r2, r4 - r7, pc}) + +.c2u_src_not_aligned: + bic r1, r1, #3 + ldr r7, [r1], #4 + cmp ip, #2 + bgt .c2u_3fupi + beq .c2u_2fupi +.c2u_1fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .c2u_1nowords + mov r3, r7, pull #8 + ldr r7, [r1], #4 + orr r3, r3, r7, push #24 +USER( strt r3, [r0], #4) @ May fault + mov ip, r0, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .c2u_1fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .c2u_1rem8lp + PLD( pld [r1, #12] ) + PLD( pld [r0, #12] ) + PLD( subs ip, ip, #32 ) + PLD( blt .c2u_1cpynopld ) + PLD( pld [r1, #28] ) + PLD( pld [r0, #28] ) + +.c2u_1cpy8lp: + PLD( pld [r1, #44] ) + PLD( pld [r0, #44] ) +.c2u_1cpynopld: mov r3, r7, pull #8 + ldmia r1!, {r4 - r7} + subs ip, ip, #16 + orr r3, r3, r4, push #24 + mov r4, r4, pull #8 + orr r4, r4, r5, push #24 + mov r5, r5, pull #8 + orr r5, r5, r6, push #24 + mov r6, r6, pull #8 + orr r6, r6, r7, push #24 + stmia r0!, {r3 - r6} @ Shouldnt fault + bpl .c2u_1cpy8lp + PLD( cmn ip, #32 ) + PLD( bge .c2u_1cpynopld ) + PLD( add ip, ip, #32 ) + +.c2u_1rem8lp: tst ip, #8 + movne r3, r7, pull #8 + ldmneia r1!, {r4, r7} + orrne r3, r3, r4, push #24 + movne r4, r4, pull #8 + orrne r4, r4, r7, push #24 + stmneia r0!, {r3 - r4} @ Shouldnt fault + tst ip, #4 + movne r3, r7, pull #8 + ldrne r7, [r1], #4 + orrne r3, r3, r7, push #24 + strnet r3, [r0], #4 @ Shouldnt fault + ands ip, ip, #3 + beq .c2u_1fupi +.c2u_1nowords: mov r3, r7, get_byte_1 + teq ip, #0 + beq .c2u_finished + cmp ip, #2 +USER( strbt r3, [r0], #1) @ May fault + movge r3, r7, get_byte_2 +USER( strgebt r3, [r0], #1) @ May fault + movgt r3, r7, get_byte_3 +USER( strgtbt r3, [r0], #1) @ May fault + b .c2u_finished + +.c2u_2fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .c2u_2nowords + mov r3, r7, pull #16 + ldr r7, [r1], #4 + orr r3, r3, r7, push #16 +USER( strt r3, [r0], #4) @ May fault + mov ip, r0, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .c2u_2fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .c2u_2rem8lp + PLD( pld [r1, #12] ) + PLD( pld [r0, #12] ) + PLD( subs ip, ip, #32 ) + PLD( blt .c2u_2cpynopld ) + PLD( pld [r1, #28] ) + PLD( pld [r0, #28] ) + +.c2u_2cpy8lp: + PLD( pld [r1, #44] ) + PLD( pld [r0, #44] ) +.c2u_2cpynopld: mov r3, r7, pull #16 + ldmia r1!, {r4 - r7} + subs ip, ip, #16 + orr r3, r3, r4, push #16 + mov r4, r4, pull #16 + orr r4, r4, r5, push #16 + mov r5, r5, pull #16 + orr r5, r5, r6, push #16 + mov r6, r6, pull #16 + orr r6, r6, r7, push #16 + stmia r0!, {r3 - r6} @ Shouldnt fault + bpl .c2u_2cpy8lp + PLD( cmn ip, #32 ) + PLD( bge .c2u_2cpynopld ) + PLD( add ip, ip, #32 ) + +.c2u_2rem8lp: tst ip, #8 + movne r3, r7, pull #16 + ldmneia r1!, {r4, r7} + orrne r3, r3, r4, push #16 + movne r4, r4, pull #16 + orrne r4, r4, r7, push #16 + stmneia r0!, {r3 - r4} @ Shouldnt fault + tst ip, #4 + movne r3, r7, pull #16 + ldrne r7, [r1], #4 + orrne r3, r3, r7, push #16 + strnet r3, [r0], #4 @ Shouldnt fault + ands ip, ip, #3 + beq .c2u_2fupi +.c2u_2nowords: mov r3, r7, get_byte_2 + teq ip, #0 + beq .c2u_finished + cmp ip, #2 +USER( strbt r3, [r0], #1) @ May fault + movge r3, r7, get_byte_3 +USER( strgebt r3, [r0], #1) @ May fault + ldrgtb r3, [r1], #0 +USER( strgtbt r3, [r0], #1) @ May fault + b .c2u_finished + +.c2u_3fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .c2u_3nowords + mov r3, r7, pull #24 + ldr r7, [r1], #4 + orr r3, r3, r7, push #8 +USER( strt r3, [r0], #4) @ May fault + mov ip, r0, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .c2u_3fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .c2u_3rem8lp + PLD( pld [r1, #12] ) + PLD( pld [r0, #12] ) + PLD( subs ip, ip, #32 ) + PLD( blt .c2u_3cpynopld ) + PLD( pld [r1, #28] ) + PLD( pld [r0, #28] ) + +.c2u_3cpy8lp: + PLD( pld [r1, #44] ) + PLD( pld [r0, #44] ) +.c2u_3cpynopld: mov r3, r7, pull #24 + ldmia r1!, {r4 - r7} + subs ip, ip, #16 + orr r3, r3, r4, push #8 + mov r4, r4, pull #24 + orr r4, r4, r5, push #8 + mov r5, r5, pull #24 + orr r5, r5, r6, push #8 + mov r6, r6, pull #24 + orr r6, r6, r7, push #8 + stmia r0!, {r3 - r6} @ Shouldnt fault + bpl .c2u_3cpy8lp + PLD( cmn ip, #32 ) + PLD( bge .c2u_3cpynopld ) + PLD( add ip, ip, #32 ) + +.c2u_3rem8lp: tst ip, #8 + movne r3, r7, pull #24 + ldmneia r1!, {r4, r7} + orrne r3, r3, r4, push #8 + movne r4, r4, pull #24 + orrne r4, r4, r7, push #8 + stmneia r0!, {r3 - r4} @ Shouldnt fault + tst ip, #4 + movne r3, r7, pull #24 + ldrne r7, [r1], #4 + orrne r3, r3, r7, push #8 + strnet r3, [r0], #4 @ Shouldnt fault + ands ip, ip, #3 + beq .c2u_3fupi +.c2u_3nowords: mov r3, r7, get_byte_3 + teq ip, #0 + beq .c2u_finished + cmp ip, #2 +USER( strbt r3, [r0], #1) @ May fault + ldrgeb r3, [r1], #1 +USER( strgebt r3, [r0], #1) @ May fault + ldrgtb r3, [r1], #0 +USER( strgtbt r3, [r0], #1) @ May fault + b .c2u_finished + + .section .fixup,"ax" + .align 0 +9001: LOADREGS(fd,sp!, {r0, r4 - r7, pc}) + .previous + +/* Prototype: unsigned long __arch_copy_from_user(void *to,const void *from,unsigned long n); + * Purpose : copy a block from user memory to kernel memory + * Params : to - kernel memory + * : from - user memory + * : n - number of bytes to copy + * Returns : Number of bytes NOT copied. + */ +.cfu_dest_not_aligned: + rsb ip, ip, #4 + cmp ip, #2 +USER( ldrbt r3, [r1], #1) @ May fault + strb r3, [r0], #1 +USER( ldrgebt r3, [r1], #1) @ May fault + strgeb r3, [r0], #1 +USER( ldrgtbt r3, [r1], #1) @ May fault + strgtb r3, [r0], #1 + sub r2, r2, ip + b .cfu_dest_aligned + +ENTRY(__arch_copy_from_user) + stmfd sp!, {r0, r2, r4 - r7, lr} + cmp r2, #4 + blt .cfu_not_enough + PLD( pld [r1, #0] ) + PLD( pld [r0, #0] ) + ands ip, r0, #3 + bne .cfu_dest_not_aligned +.cfu_dest_aligned: + ands ip, r1, #3 + bne .cfu_src_not_aligned +/* + * Seeing as there has to be at least 8 bytes to copy, we can + * copy one word, and force a user-mode page fault... + */ + +.cfu_0fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .cfu_0nowords +USER( ldrt r3, [r1], #4) + str r3, [r0], #4 + mov ip, r1, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .cfu_0fupi +/* + * ip = max no. of bytes to copy before needing another "strt" insn + */ + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #32 + blt .cfu_0rem8lp + PLD( pld [r1, #28] ) + PLD( pld [r0, #28] ) + PLD( subs ip, ip, #64 ) + PLD( blt .cfu_0cpynopld ) + PLD( pld [r1, #60] ) + PLD( pld [r0, #60] ) + +.cfu_0cpy8lp: + PLD( pld [r1, #92] ) + PLD( pld [r0, #92] ) +.cfu_0cpynopld: ldmia r1!, {r3 - r6} @ Shouldnt fault + stmia r0!, {r3 - r6} + ldmia r1!, {r3 - r6} @ Shouldnt fault + subs ip, ip, #32 + stmia r0!, {r3 - r6} + bpl .cfu_0cpy8lp + PLD( cmn ip, #64 ) + PLD( bge .cfu_0cpynopld ) + PLD( add ip, ip, #64 ) + +.cfu_0rem8lp: cmn ip, #16 + ldmgeia r1!, {r3 - r6} @ Shouldnt fault + stmgeia r0!, {r3 - r6} + tst ip, #8 + ldmneia r1!, {r3 - r4} @ Shouldnt fault + stmneia r0!, {r3 - r4} + tst ip, #4 + ldrnet r3, [r1], #4 @ Shouldnt fault + strne r3, [r0], #4 + ands ip, ip, #3 + beq .cfu_0fupi +.cfu_0nowords: teq ip, #0 + beq .cfu_finished +.cfu_nowords: cmp ip, #2 +USER( ldrbt r3, [r1], #1) @ May fault + strb r3, [r0], #1 +USER( ldrgebt r3, [r1], #1) @ May fault + strgeb r3, [r0], #1 +USER( ldrgtbt r3, [r1], #1) @ May fault + strgtb r3, [r0], #1 + b .cfu_finished + +.cfu_not_enough: + movs ip, r2 + bne .cfu_nowords +.cfu_finished: mov r0, #0 + add sp, sp, #8 + LOADREGS(fd,sp!,{r4 - r7, pc}) + +.cfu_src_not_aligned: + bic r1, r1, #3 +USER( ldrt r7, [r1], #4) @ May fault + cmp ip, #2 + bgt .cfu_3fupi + beq .cfu_2fupi +.cfu_1fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .cfu_1nowords + mov r3, r7, pull #8 +USER( ldrt r7, [r1], #4) @ May fault + orr r3, r3, r7, push #24 + str r3, [r0], #4 + mov ip, r1, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .cfu_1fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .cfu_1rem8lp + PLD( pld [r1, #12] ) + PLD( pld [r0, #12] ) + PLD( subs ip, ip, #32 ) + PLD( blt .cfu_1cpynopld ) + PLD( pld [r1, #28] ) + PLD( pld [r0, #28] ) + +.cfu_1cpy8lp: + PLD( pld [r1, #44] ) + PLD( pld [r0, #44] ) +.cfu_1cpynopld: mov r3, r7, pull #8 + ldmia r1!, {r4 - r7} @ Shouldnt fault + subs ip, ip, #16 + orr r3, r3, r4, push #24 + mov r4, r4, pull #8 + orr r4, r4, r5, push #24 + mov r5, r5, pull #8 + orr r5, r5, r6, push #24 + mov r6, r6, pull #8 + orr r6, r6, r7, push #24 + stmia r0!, {r3 - r6} + bpl .cfu_1cpy8lp + PLD( cmn ip, #32 ) + PLD( bge .cfu_1cpynopld ) + PLD( add ip, ip, #32 ) + +.cfu_1rem8lp: tst ip, #8 + movne r3, r7, pull #8 + ldmneia r1!, {r4, r7} @ Shouldnt fault + orrne r3, r3, r4, push #24 + movne r4, r4, pull #8 + orrne r4, r4, r7, push #24 + stmneia r0!, {r3 - r4} + tst ip, #4 + movne r3, r7, pull #8 +USER( ldrnet r7, [r1], #4) @ May fault + orrne r3, r3, r7, push #24 + strne r3, [r0], #4 + ands ip, ip, #3 + beq .cfu_1fupi +.cfu_1nowords: mov r3, r7, get_byte_1 + teq ip, #0 + beq .cfu_finished + cmp ip, #2 + strb r3, [r0], #1 + movge r3, r7, get_byte_2 + strgeb r3, [r0], #1 + movgt r3, r7, get_byte_3 + strgtb r3, [r0], #1 + b .cfu_finished + +.cfu_2fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .cfu_2nowords + mov r3, r7, pull #16 +USER( ldrt r7, [r1], #4) @ May fault + orr r3, r3, r7, push #16 + str r3, [r0], #4 + mov ip, r1, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .cfu_2fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .cfu_2rem8lp + PLD( pld [r1, #12] ) + PLD( pld [r0, #12] ) + PLD( subs ip, ip, #32 ) + PLD( blt .cfu_2cpynopld ) + PLD( pld [r1, #28] ) + PLD( pld [r0, #28] ) + +.cfu_2cpy8lp: + PLD( pld [r1, #44] ) + PLD( pld [r0, #44] ) +.cfu_2cpynopld: mov r3, r7, pull #16 + ldmia r1!, {r4 - r7} @ Shouldnt fault + subs ip, ip, #16 + orr r3, r3, r4, push #16 + mov r4, r4, pull #16 + orr r4, r4, r5, push #16 + mov r5, r5, pull #16 + orr r5, r5, r6, push #16 + mov r6, r6, pull #16 + orr r6, r6, r7, push #16 + stmia r0!, {r3 - r6} + bpl .cfu_2cpy8lp + PLD( cmn ip, #32 ) + PLD( bge .cfu_2cpynopld ) + PLD( add ip, ip, #32 ) + +.cfu_2rem8lp: tst ip, #8 + movne r3, r7, pull #16 + ldmneia r1!, {r4, r7} @ Shouldnt fault + orrne r3, r3, r4, push #16 + movne r4, r4, pull #16 + orrne r4, r4, r7, push #16 + stmneia r0!, {r3 - r4} + tst ip, #4 + movne r3, r7, pull #16 +USER( ldrnet r7, [r1], #4) @ May fault + orrne r3, r3, r7, push #16 + strne r3, [r0], #4 + ands ip, ip, #3 + beq .cfu_2fupi +.cfu_2nowords: mov r3, r7, get_byte_2 + teq ip, #0 + beq .cfu_finished + cmp ip, #2 + strb r3, [r0], #1 + movge r3, r7, get_byte_3 + strgeb r3, [r0], #1 +USER( ldrgtbt r3, [r1], #0) @ May fault + strgtb r3, [r0], #1 + b .cfu_finished + +.cfu_3fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .cfu_3nowords + mov r3, r7, pull #24 +USER( ldrt r7, [r1], #4) @ May fault + orr r3, r3, r7, push #8 + str r3, [r0], #4 + mov ip, r1, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .cfu_3fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .cfu_3rem8lp + PLD( pld [r1, #12] ) + PLD( pld [r0, #12] ) + PLD( subs ip, ip, #32 ) + PLD( blt .cfu_3cpynopld ) + PLD( pld [r1, #28] ) + PLD( pld [r0, #28] ) + +.cfu_3cpy8lp: + PLD( pld [r1, #44] ) + PLD( pld [r0, #44] ) +.cfu_3cpynopld: mov r3, r7, pull #24 + ldmia r1!, {r4 - r7} @ Shouldnt fault + orr r3, r3, r4, push #8 + mov r4, r4, pull #24 + orr r4, r4, r5, push #8 + mov r5, r5, pull #24 + orr r5, r5, r6, push #8 + mov r6, r6, pull #24 + orr r6, r6, r7, push #8 + stmia r0!, {r3 - r6} + subs ip, ip, #16 + bpl .cfu_3cpy8lp + PLD( cmn ip, #32 ) + PLD( bge .cfu_3cpynopld ) + PLD( add ip, ip, #32 ) + +.cfu_3rem8lp: tst ip, #8 + movne r3, r7, pull #24 + ldmneia r1!, {r4, r7} @ Shouldnt fault + orrne r3, r3, r4, push #8 + movne r4, r4, pull #24 + orrne r4, r4, r7, push #8 + stmneia r0!, {r3 - r4} + tst ip, #4 + movne r3, r7, pull #24 +USER( ldrnet r7, [r1], #4) @ May fault + orrne r3, r3, r7, push #8 + strne r3, [r0], #4 + ands ip, ip, #3 + beq .cfu_3fupi +.cfu_3nowords: mov r3, r7, get_byte_3 + teq ip, #0 + beq .cfu_finished + cmp ip, #2 + strb r3, [r0], #1 +USER( ldrgebt r3, [r1], #1) @ May fault + strgeb r3, [r0], #1 +USER( ldrgtbt r3, [r1], #1) @ May fault + strgtb r3, [r0], #1 + b .cfu_finished + + .section .fixup,"ax" + .align 0 + /* + * We took an exception. r0 contains a pointer to + * the byte not copied. + */ +9001: ldr r2, [sp], #4 @ void *to + sub r2, r0, r2 @ bytes copied + ldr r1, [sp], #4 @ unsigned long count + subs r4, r1, r2 @ bytes left to copy + movne r1, r4 + blne __memzero + mov r0, r4 + LOADREGS(fd,sp!, {r4 - r7, pc}) + .previous + +/* Prototype: int __arch_clear_user(void *addr, size_t sz) + * Purpose : clear some user memory + * Params : addr - user memory address to clear + * : sz - number of bytes to clear + * Returns : number of bytes NOT cleared + */ +ENTRY(__arch_clear_user) + stmfd sp!, {r1, lr} + mov r2, #0 + cmp r1, #4 + blt 2f + ands ip, r0, #3 + beq 1f + cmp ip, #2 +USER( strbt r2, [r0], #1) +USER( strlebt r2, [r0], #1) +USER( strltbt r2, [r0], #1) + rsb ip, ip, #4 + sub r1, r1, ip @ 7 6 5 4 3 2 1 +1: subs r1, r1, #8 @ -1 -2 -3 -4 -5 -6 -7 +USER( strplt r2, [r0], #4) +USER( strplt r2, [r0], #4) + bpl 1b + adds r1, r1, #4 @ 3 2 1 0 -1 -2 -3 +USER( strplt r2, [r0], #4) +2: tst r1, #2 @ 1x 1x 0x 0x 1x 1x 0x +USER( strnebt r2, [r0], #1) +USER( strnebt r2, [r0], #1) + tst r1, #1 @ x1 x0 x1 x0 x1 x0 x1 +USER( strnebt r2, [r0], #1) + mov r0, #0 + LOADREGS(fd,sp!, {r1, pc}) + + .section .fixup,"ax" + .align 0 +9001: LOADREGS(fd,sp!, {r0, pc}) + .previous + diff --git a/arch/arm/lib/ucmpdi2.c b/arch/arm/lib/ucmpdi2.c new file mode 100644 index 00000000000..6c6ae63efa0 --- /dev/null +++ b/arch/arm/lib/ucmpdi2.c @@ -0,0 +1,51 @@ +/* More subroutines needed by GCC output code on some machines. */ +/* Compile this one with gcc. */ +/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you link this library with other files, + some of which are compiled with GCC, to produce an executable, + this library does not by itself cause the resulting executable + to be covered by the GNU General Public License. + This exception does not however invalidate any other reasons why + the executable file might be covered by the GNU General Public License. + */ +/* support functions required by the kernel. based on code from gcc-2.95.3 */ +/* I Molton 29/07/01 */ + +#include "gcclib.h" + +word_type +__ucmpdi2 (DItype a, DItype b) +{ + DIunion au, bu; + + au.ll = a, bu.ll = b; + + if ((USItype) au.s.high < (USItype) bu.s.high) + return 0; + else if ((USItype) au.s.high > (USItype) bu.s.high) + return 2; + if ((USItype) au.s.low < (USItype) bu.s.low) + return 0; + else if ((USItype) au.s.low > (USItype) bu.s.low) + return 2; + return 1; +} + diff --git a/arch/arm/lib/udivdi3.c b/arch/arm/lib/udivdi3.c new file mode 100644 index 00000000000..d25195f673f --- /dev/null +++ b/arch/arm/lib/udivdi3.c @@ -0,0 +1,242 @@ +/* More subroutines needed by GCC output code on some machines. */ +/* Compile this one with gcc. */ +/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you link this library with other files, + some of which are compiled with GCC, to produce an executable, + this library does not by itself cause the resulting executable + to be covered by the GNU General Public License. + This exception does not however invalidate any other reasons why + the executable file might be covered by the GNU General Public License. + */ +/* support functions required by the kernel. based on code from gcc-2.95.3 */ +/* I Molton 29/07/01 */ + +#include "gcclib.h" +#include "longlong.h" + +static const UQItype __clz_tab[] = +{ + 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, +}; + +UDItype +__udivmoddi4 (UDItype n, UDItype d, UDItype *rp) +{ + DIunion ww; + DIunion nn, dd; + DIunion rr; + USItype d0, d1, n0, n1, n2; + USItype q0, q1; + USItype b, bm; + + nn.ll = n; + dd.ll = d; + + d0 = dd.s.low; + d1 = dd.s.high; + n0 = nn.s.low; + n1 = nn.s.high; + + if (d1 == 0) + { + if (d0 > n1) + { + /* 0q = nn / 0D */ + + count_leading_zeros (bm, d0); + + if (bm != 0) + { + /* Normalize, i.e. make the most significant bit of the + denominator set. */ + + d0 = d0 << bm; + n1 = (n1 << bm) | (n0 >> (SI_TYPE_SIZE - bm)); + n0 = n0 << bm; + } + + udiv_qrnnd (q0, n0, n1, n0, d0); + q1 = 0; + + /* Remainder in n0 >> bm. */ + } + else + { + /* qq = NN / 0d */ + + if (d0 == 0) + d0 = 1 / d0; /* Divide intentionally by zero. */ + + count_leading_zeros (bm, d0); + + if (bm == 0) + { + /* From (n1 >= d0) /\ (the most significant bit of d0 is set), + conclude (the most significant bit of n1 is set) /\ (the + leading quotient digit q1 = 1). + + This special case is necessary, not an optimization. + (Shifts counts of SI_TYPE_SIZE are undefined.) */ + + n1 -= d0; + q1 = 1; + } + else + { + /* Normalize. */ + + b = SI_TYPE_SIZE - bm; + + d0 = d0 << bm; + n2 = n1 >> b; + n1 = (n1 << bm) | (n0 >> b); + n0 = n0 << bm; + + udiv_qrnnd (q1, n1, n2, n1, d0); + } + + /* n1 != d0... */ + + udiv_qrnnd (q0, n0, n1, n0, d0); + + /* Remainder in n0 >> bm. */ + } + + if (rp != 0) + { + rr.s.low = n0 >> bm; + rr.s.high = 0; + *rp = rr.ll; + } + } + else + { + if (d1 > n1) + { + /* 00 = nn / DD */ + + q0 = 0; + q1 = 0; + + /* Remainder in n1n0. */ + if (rp != 0) + { + rr.s.low = n0; + rr.s.high = n1; + *rp = rr.ll; + } + } + else + { + /* 0q = NN / dd */ + + count_leading_zeros (bm, d1); + if (bm == 0) + { + /* From (n1 >= d1) /\ (the most significant bit of d1 is set), + conclude (the most significant bit of n1 is set) /\ (the + quotient digit q0 = 0 or 1). + + This special case is necessary, not an optimization. */ + + /* The condition on the next line takes advantage of that + n1 >= d1 (true due to program flow). */ + if (n1 > d1 || n0 >= d0) + { + q0 = 1; + sub_ddmmss (n1, n0, n1, n0, d1, d0); + } + else + q0 = 0; + + q1 = 0; + + if (rp != 0) + { + rr.s.low = n0; + rr.s.high = n1; + *rp = rr.ll; + } + } + else + { + USItype m1, m0; + /* Normalize. */ + + b = SI_TYPE_SIZE - bm; + + d1 = (d1 << bm) | (d0 >> b); + d0 = d0 << bm; + n2 = n1 >> b; + n1 = (n1 << bm) | (n0 >> b); + n0 = n0 << bm; + + udiv_qrnnd (q0, n1, n2, n1, d1); + umul_ppmm (m1, m0, q0, d0); + + if (m1 > n1 || (m1 == n1 && m0 > n0)) + { + q0--; + sub_ddmmss (m1, m0, m1, m0, d1, d0); + } + + q1 = 0; + + /* Remainder in (n1n0 - m1m0) >> bm. */ + if (rp != 0) + { + sub_ddmmss (n1, n0, n1, n0, m1, m0); + rr.s.low = (n1 << b) | (n0 >> bm); + rr.s.high = n1 >> bm; + *rp = rr.ll; + } + } + } + } + + ww.s.low = q0; + ww.s.high = q1; + return ww.ll; +} + +UDItype +__udivdi3 (UDItype n, UDItype d) +{ + return __udivmoddi4 (n, d, (UDItype *) 0); +} + +UDItype +__umoddi3 (UDItype u, UDItype v) +{ + UDItype w; + + (void) __udivmoddi4 (u ,v, &w); + + return w; +} + |