From 3370d82f3b3ff04d082a9c343a80019282e41261 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 7 Dec 2010 11:55:06 +0100 Subject: microblaze: Fix __muldi3 function for little-endian. __muldi3 was written for big endian platforms. Code contained half word read/write instructions which are not compatible with little endian cpu. Asm __muldi3 implementation is replaced by C version. Signed-off-by: Michal Simek --- arch/microblaze/lib/muldi3.S | 121 ------------------------------------------- arch/microblaze/lib/muldi3.c | 60 +++++++++++++++++++++ 2 files changed, 60 insertions(+), 121 deletions(-) delete mode 100644 arch/microblaze/lib/muldi3.S create mode 100644 arch/microblaze/lib/muldi3.c diff --git a/arch/microblaze/lib/muldi3.S b/arch/microblaze/lib/muldi3.S deleted file mode 100644 index ceeaa8c407f..00000000000 --- a/arch/microblaze/lib/muldi3.S +++ /dev/null @@ -1,121 +0,0 @@ -#include - -/* - * Multiply operation for 64 bit integers, for devices with hard multiply - * Input : Operand1[H] in Reg r5 - * Operand1[L] in Reg r6 - * Operand2[H] in Reg r7 - * Operand2[L] in Reg r8 - * Output: Result[H] in Reg r3 - * Result[L] in Reg r4 - * - * Explaination: - * - * Both the input numbers are divided into 16 bit number as follows - * op1 = A B C D - * op2 = E F G H - * result = D * H - * + (C * H + D * G) << 16 - * + (B * H + C * G + D * F) << 32 - * + (A * H + B * G + C * F + D * E) << 48 - * - * Only 64 bits of the output are considered - */ - - .text - .globl __muldi3 - .type __muldi3, @function - .ent __muldi3 - -__muldi3: - addi r1, r1, -40 - -/* Save the input operands on the caller's stack */ - swi r5, r1, 44 - swi r6, r1, 48 - swi r7, r1, 52 - swi r8, r1, 56 - -/* Store all the callee saved registers */ - sw r20, r1, r0 - swi r21, r1, 4 - swi r22, r1, 8 - swi r23, r1, 12 - swi r24, r1, 16 - swi r25, r1, 20 - swi r26, r1, 24 - swi r27, r1, 28 - -/* Load all the 16 bit values for A thru H */ - lhui r20, r1, 44 /* A */ - lhui r21, r1, 46 /* B */ - lhui r22, r1, 48 /* C */ - lhui r23, r1, 50 /* D */ - lhui r24, r1, 52 /* E */ - lhui r25, r1, 54 /* F */ - lhui r26, r1, 56 /* G */ - lhui r27, r1, 58 /* H */ - -/* D * H ==> LSB of the result on stack ==> Store1 */ - mul r9, r23, r27 - swi r9, r1, 36 /* Pos2 and Pos3 */ - -/* Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2 */ -/* Store the carry generated in position 2 for Pos 3 */ - lhui r11, r1, 36 /* Pos2 */ - mul r9, r22, r27 /* C * H */ - mul r10, r23, r26 /* D * G */ - add r9, r9, r10 - addc r12, r0, r0 - add r9, r9, r11 - addc r12, r12, r0 /* Store the Carry */ - shi r9, r1, 36 /* Store Pos2 */ - swi r9, r1, 32 - lhui r11, r1, 32 - shi r11, r1, 34 /* Store Pos1 */ - -/* Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1 */ - mul r9, r21, r27 /* B * H */ - mul r10, r22, r26 /* C * G */ - mul r7, r23, r25 /* D * F */ - add r9, r9, r11 - add r9, r9, r10 - add r9, r9, r7 - swi r9, r1, 32 /* Pos0 and Pos1 */ - -/* Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0 */ - lhui r11, r1, 32 /* Pos0 */ - mul r9, r20, r27 /* A * H */ - mul r10, r21, r26 /* B * G */ - mul r7, r22, r25 /* C * F */ - mul r8, r23, r24 /* D * E */ - add r9, r9, r11 - add r9, r9, r10 - add r9, r9, r7 - add r9, r9, r8 - sext16 r9, r9 /* Sign extend the MSB */ - shi r9, r1, 32 - -/* Move results to r3 and r4 */ - lhui r3, r1, 32 - add r3, r3, r12 - shi r3, r1, 32 - lwi r3, r1, 32 /* Hi Part */ - lwi r4, r1, 36 /* Lo Part */ - -/* Restore Callee saved registers */ - lw r20, r1, r0 - lwi r21, r1, 4 - lwi r22, r1, 8 - lwi r23, r1, 12 - lwi r24, r1, 16 - lwi r25, r1, 20 - lwi r26, r1, 24 - lwi r27, r1, 28 - -/* Restore Frame and return */ - rtsd r15, 8 - addi r1, r1, 40 - -.size __muldi3, . - __muldi3 -.end __muldi3 diff --git a/arch/microblaze/lib/muldi3.c b/arch/microblaze/lib/muldi3.c new file mode 100644 index 00000000000..d4860e154d2 --- /dev/null +++ b/arch/microblaze/lib/muldi3.c @@ -0,0 +1,60 @@ +#include + +#include "libgcc.h" + +#define DWtype long long +#define UWtype unsigned long +#define UHWtype unsigned short + +#define W_TYPE_SIZE 32 + +#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) +#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) + +/* If we still don't have umul_ppmm, define it using plain C. */ +#if !defined(umul_ppmm) +#define umul_ppmm(w1, w0, u, v) \ + do { \ + UWtype __x0, __x1, __x2, __x3; \ + UHWtype __ul, __vl, __uh, __vh; \ + \ + __ul = __ll_lowpart(u); \ + __uh = __ll_highpart(u); \ + __vl = __ll_lowpart(v); \ + __vh = __ll_highpart(v); \ + \ + __x0 = (UWtype) __ul * __vl; \ + __x1 = (UWtype) __ul * __vh; \ + __x2 = (UWtype) __uh * __vl; \ + __x3 = (UWtype) __uh * __vh; \ + \ + __x1 += __ll_highpart(__x0); /* this can't give carry */\ + __x1 += __x2; /* but this indeed can */ \ + if (__x1 < __x2) /* did we get it? */ \ + __x3 += __ll_B; /* yes, add it in the proper pos */ \ + \ + (w1) = __x3 + __ll_highpart(__x1); \ + (w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);\ + } while (0) +#endif + +#if !defined(__umulsidi3) +#define __umulsidi3(u, v) ({ \ + DWunion __w; \ + umul_ppmm(__w.s.high, __w.s.low, u, v); \ + __w.ll; \ + }) +#endif + +DWtype __muldi3(DWtype u, DWtype v) +{ + const DWunion uu = {.ll = u}; + const DWunion vv = {.ll = v}; + DWunion w = {.ll = __umulsidi3(uu.s.low, vv.s.low)}; + + w.s.high += ((UWtype) uu.s.low * (UWtype) vv.s.high + + (UWtype) uu.s.high * (UWtype) vv.s.low); + + return w.ll; +} -- cgit v1.2.3-70-g09d2