diff options
Diffstat (limited to 'arch/c6x/lib/csum_64plus.S')
-rw-r--r-- | arch/c6x/lib/csum_64plus.S | 419 |
1 files changed, 419 insertions, 0 deletions
diff --git a/arch/c6x/lib/csum_64plus.S b/arch/c6x/lib/csum_64plus.S new file mode 100644 index 00000000000..6d258964722 --- /dev/null +++ b/arch/c6x/lib/csum_64plus.S @@ -0,0 +1,419 @@ +; +; linux/arch/c6x/lib/csum_64plus.s +; +; Port on Texas Instruments TMS320C6x architecture +; +; Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated +; Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) +; +; This program is free software; you can redistribute it and/or modify +; it under the terms of the GNU General Public License version 2 as +; published by the Free Software Foundation. +; +#include <linux/linkage.h> + +; +;unsigned int csum_partial_copy(const char *src, char * dst, +; int len, int sum) +; +; A4: src +; B4: dst +; A6: len +; B6: sum +; return csum in A4 +; + + .text +ENTRY(csum_partial_copy) + MVC .S2 ILC,B30 + + MV .D1X B6,A31 ; given csum + ZERO .D1 A9 ; csum (a side) +|| ZERO .D2 B9 ; csum (b side) +|| SHRU .S2X A6,2,B5 ; len / 4 + + ;; Check alignment and size + AND .S1 3,A4,A1 +|| AND .S2 3,B4,B0 + OR .L2X B0,A1,B0 ; non aligned condition +|| MVC .S2 B5,ILC +|| MVK .D2 1,B2 +|| MV .D1X B5,A1 ; words condition + [!A1] B .S1 L8 + [B0] BNOP .S1 L6,5 + + SPLOOP 1 + + ;; Main loop for aligned words + LDW .D1T1 *A4++,A7 + NOP 4 + MV .S2X A7,B7 +|| EXTU .S1 A7,0,16,A16 + STW .D2T2 B7,*B4++ +|| MPYU .M2 B7,B2,B8 +|| ADD .L1 A16,A9,A9 + NOP + SPKERNEL 8,0 +|| ADD .L2 B8,B9,B9 + + ZERO .D1 A1 +|| ADD .L1X A9,B9,A9 ; add csum from a and b sides + +L6: + [!A1] BNOP .S1 L8,5 + + ;; Main loop for non-aligned words + SPLOOP 2 + || MVK .L1 1,A2 + + LDNW .D1T1 *A4++,A7 + NOP 3 + + NOP + MV .S2X A7,B7 + || EXTU .S1 A7,0,16,A16 + || MPYU .M1 A7,A2,A8 + + ADD .L1 A16,A9,A9 + SPKERNEL 6,0 + || STNW .D2T2 B7,*B4++ + || ADD .L1 A8,A9,A9 + +L8: AND .S2X 2,A6,B5 + CMPGT .L2 B5,0,B0 + [!B0] BNOP .S1 L82,4 + + ;; Manage half-word + ZERO .L1 A7 +|| ZERO .D1 A8 + +#ifdef CONFIG_CPU_BIG_ENDIAN + + LDBU .D1T1 *A4++,A7 + LDBU .D1T1 *A4++,A8 + NOP 3 + SHL .S1 A7,8,A0 + ADD .S1 A8,A9,A9 + STB .D2T1 A7,*B4++ +|| ADD .S1 A0,A9,A9 + STB .D2T1 A8,*B4++ + +#else + + LDBU .D1T1 *A4++,A7 + LDBU .D1T1 *A4++,A8 + NOP 3 + ADD .S1 A7,A9,A9 + SHL .S1 A8,8,A0 + + STB .D2T1 A7,*B4++ +|| ADD .S1 A0,A9,A9 + STB .D2T1 A8,*B4++ + +#endif + + ;; Manage eventually the last byte +L82: AND .S2X 1,A6,B0 + [!B0] BNOP .S1 L9,5 + +|| ZERO .L1 A7 + +L83: LDBU .D1T1 *A4++,A7 + NOP 4 + + MV .L2X A7,B7 + +#ifdef CONFIG_CPU_BIG_ENDIAN + + STB .D2T2 B7,*B4++ +|| SHL .S1 A7,8,A7 + ADD .S1 A7,A9,A9 + +#else + + STB .D2T2 B7,*B4++ +|| ADD .S1 A7,A9,A9 + +#endif + + ;; Fold the csum +L9: SHRU .S2X A9,16,B0 + [!B0] BNOP .S1 L10,5 + +L91: SHRU .S2X A9,16,B4 +|| EXTU .S1 A9,16,16,A3 + ADD .D1X A3,B4,A9 + + SHRU .S1 A9,16,A0 + [A0] BNOP .S1 L91,5 + +L10: ADD .D1 A31,A9,A9 + MV .D1 A9,A4 + + BNOP .S2 B3,4 + MVC .S2 B30,ILC +ENDPROC(csum_partial_copy) + +; +;unsigned short +;ip_fast_csum(unsigned char *iph, unsigned int ihl) +;{ +; unsigned int checksum = 0; +; unsigned short *tosum = (unsigned short *) iph; +; int len; +; +; len = ihl*4; +; +; if (len <= 0) +; return 0; +; +; while(len) { +; len -= 2; +; checksum += *tosum++; +; } +; if (len & 1) +; checksum += *(unsigned char*) tosum; +; +; while(checksum >> 16) +; checksum = (checksum & 0xffff) + (checksum >> 16); +; +; return ~checksum; +;} +; +; A4: iph +; B4: ihl +; return checksum in A4 +; + .text + +ENTRY(ip_fast_csum) + ZERO .D1 A5 + || MVC .S2 ILC,B30 + SHL .S2 B4,2,B0 + CMPGT .L2 B0,0,B1 + [!B1] BNOP .S1 L15,4 + [!B1] ZERO .D1 A3 + + [!B0] B .S1 L12 + SHRU .S2 B0,1,B0 + MVC .S2 B0,ILC + NOP 3 + + SPLOOP 1 + LDHU .D1T1 *A4++,A3 + NOP 3 + NOP + SPKERNEL 5,0 + || ADD .L1 A3,A5,A5 + +L12: SHRU .S1 A5,16,A0 + [!A0] BNOP .S1 L14,5 + +L13: SHRU .S2X A5,16,B4 + EXTU .S1 A5,16,16,A3 + ADD .D1X A3,B4,A5 + SHRU .S1 A5,16,A0 + [A0] BNOP .S1 L13,5 + +L14: NOT .D1 A5,A3 + EXTU .S1 A3,16,16,A3 + +L15: BNOP .S2 B3,3 + MVC .S2 B30,ILC + MV .D1 A3,A4 +ENDPROC(ip_fast_csum) + +; +;unsigned short +;do_csum(unsigned char *buff, unsigned int len) +;{ +; int odd, count; +; unsigned int result = 0; +; +; if (len <= 0) +; goto out; +; odd = 1 & (unsigned long) buff; +; if (odd) { +;#ifdef __LITTLE_ENDIAN +; result += (*buff << 8); +;#else +; result = *buff; +;#endif +; len--; +; buff++; +; } +; count = len >> 1; /* nr of 16-bit words.. */ +; if (count) { +; if (2 & (unsigned long) buff) { +; result += *(unsigned short *) buff; +; count--; +; len -= 2; +; buff += 2; +; } +; count >>= 1; /* nr of 32-bit words.. */ +; if (count) { +; unsigned int carry = 0; +; do { +; unsigned int w = *(unsigned int *) buff; +; count--; +; buff += 4; +; result += carry; +; result += w; +; carry = (w > result); +; } while (count); +; result += carry; +; result = (result & 0xffff) + (result >> 16); +; } +; if (len & 2) { +; result += *(unsigned short *) buff; +; buff += 2; +; } +; } +; if (len & 1) +;#ifdef __LITTLE_ENDIAN +; result += *buff; +;#else +; result += (*buff << 8); +;#endif +; result = (result & 0xffff) + (result >> 16); +; /* add up carry.. */ +; result = (result & 0xffff) + (result >> 16); +; if (odd) +; result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); +;out: +; return result; +;} +; +; A4: buff +; B4: len +; return checksum in A4 +; + +ENTRY(do_csum) + CMPGT .L2 B4,0,B0 + [!B0] BNOP .S1 L26,3 + EXTU .S1 A4,31,31,A0 + + MV .L1 A0,A3 +|| MV .S1X B3,A5 +|| MV .L2 B4,B3 +|| ZERO .D1 A1 + +#ifdef CONFIG_CPU_BIG_ENDIAN + [A0] SUB .L2 B3,1,B3 +|| [A0] LDBU .D1T1 *A4++,A1 +#else + [!A0] BNOP .S1 L21,5 +|| [A0] LDBU .D1T1 *A4++,A0 + SUB .L2 B3,1,B3 +|| SHL .S1 A0,8,A1 +L21: +#endif + SHR .S2 B3,1,B0 + [!B0] BNOP .S1 L24,3 + MVK .L1 2,A0 + AND .L1 A4,A0,A0 + + [!A0] BNOP .S1 L22,5 +|| [A0] LDHU .D1T1 *A4++,A0 + SUB .L2 B0,1,B0 +|| SUB .S2 B3,2,B3 +|| ADD .L1 A0,A1,A1 +L22: + SHR .S2 B0,1,B0 +|| ZERO .L1 A0 + + [!B0] BNOP .S1 L23,5 +|| [B0] MVC .S2 B0,ILC + + SPLOOP 3 + SPMASK L1 +|| MV .L1 A1,A2 +|| LDW .D1T1 *A4++,A1 + + NOP 4 + ADD .L1 A0,A1,A0 + ADD .L1 A2,A0,A2 + + SPKERNEL 1,2 +|| CMPGTU .L1 A1,A2,A0 + + ADD .L1 A0,A2,A6 + EXTU .S1 A6,16,16,A7 + SHRU .S2X A6,16,B0 + NOP 1 + ADD .L1X A7,B0,A1 +L23: + MVK .L2 2,B0 + AND .L2 B3,B0,B0 + [B0] LDHU .D1T1 *A4++,A0 + NOP 4 + [B0] ADD .L1 A0,A1,A1 +L24: + EXTU .S2 B3,31,31,B0 +#ifdef CONFIG_CPU_BIG_ENDIAN + [!B0] BNOP .S1 L25,4 +|| [B0] LDBU .D1T1 *A4,A0 + SHL .S1 A0,8,A0 + ADD .L1 A0,A1,A1 +L25: +#else + [B0] LDBU .D1T1 *A4,A0 + NOP 4 + [B0] ADD .L1 A0,A1,A1 +#endif + EXTU .S1 A1,16,16,A0 + SHRU .S2X A1,16,B0 + NOP 1 + ADD .L1X A0,B0,A0 + SHRU .S1 A0,16,A1 + ADD .L1 A0,A1,A0 + EXTU .S1 A0,16,16,A1 + EXTU .S1 A1,16,24,A2 + + EXTU .S1 A1,24,16,A0 +|| MV .L2X A3,B0 + + [B0] OR .L1 A0,A2,A1 +L26: + NOP 1 + BNOP .S2X A5,4 + MV .L1 A1,A4 +ENDPROC(do_csum) + +;__wsum csum_partial(const void *buff, int len, __wsum wsum) +;{ +; unsigned int sum = (__force unsigned int)wsum; +; unsigned int result = do_csum(buff, len); +; +; /* add in old sum, and carry.. */ +; result += sum; +; if (sum > result) +; result += 1; +; return (__force __wsum)result; +;} +; +ENTRY(csum_partial) + MV .L1X B3,A9 +|| CALLP .S2 do_csum,B3 +|| MV .S1 A6,A8 + BNOP .S2X A9,2 + ADD .L1 A8,A4,A1 + CMPGTU .L1 A8,A1,A0 + ADD .L1 A1,A0,A4 +ENDPROC(csum_partial) + +;unsigned short +;ip_compute_csum(unsigned char *buff, unsigned int len) +; +; A4: buff +; B4: len +; return checksum in A4 + +ENTRY(ip_compute_csum) + MV .L1X B3,A9 +|| CALLP .S2 do_csum,B3 + BNOP .S2X A9,3 + NOT .S1 A4,A4 + CLR .S1 A4,16,31,A4 +ENDPROC(ip_compute_csum) |