diff options
author | Olof Johansson <olof@lixom.net> | 2012-02-10 12:40:34 -0800 |
---|---|---|
committer | Olof Johansson <olof@lixom.net> | 2012-02-10 12:40:34 -0800 |
commit | 082f53c2f573c75a8f1610c587a43b6817e20f90 (patch) | |
tree | c3ad3a58967e7f7f61ff12ff59284dba1e76d093 /lib/mpi/longlong.h | |
parent | e69128b947da0a2474447868f73c76311e2baedb (diff) | |
parent | b024e33682c8e56cc8e869245dabd1b91ffe00ea (diff) |
Merge branch 'for-arm-soc' of git://sources.calxeda.com/kernel/linux into next/soc
* 'for-arm-soc' of git://sources.calxeda.com/kernel/linux: (247 commits)
ARM: highbank: remove unused memory.h
ARM: highbank: enable sp804 based sched_clock
ARM: timer-sp: add sched_clock support
Linux 3.3-rc3
pcmcia: fix socket refcount decrementing on each resume
mm: fix UP THP spin_is_locked BUGs
drivers/leds/leds-lm3530.c: fix setting pltfm->als_vmax
mm: compaction: check for overlapping nodes during isolation for migration
nilfs2: avoid overflowing segment numbers in nilfs_ioctl_clean_segments()
ASoC: wm8994: Disable line output discharge prior to ramping VMID
ASoC: wm8994: Fix typo in VMID ramp setting
ALSA: oxygen, virtuoso: fix exchanged L/R volumes of aux and CD inputs
ALSA: usb-audio: add Edirol UM-3G support
checkpatch: Warn on code with 6+ tab indentation
ACPI: remove duplicated lines of merging problems with acpi_processor_add
ALSA: hda - add support for Uniwill ECS M31EI notebook
HID: wiimote: fix invalid power_supply_powers call
ALSA: hda - Fix error handling in patch_ca0132.c
target: Fix unsupported WRITE_SAME sense payload
iscsi: use IP_FREEBIND socket option
...
Diffstat (limited to 'lib/mpi/longlong.h')
-rw-r--r-- | lib/mpi/longlong.h | 44 |
1 files changed, 33 insertions, 11 deletions
diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h index b87487b40a8..29f98624ef9 100644 --- a/lib/mpi/longlong.h +++ b/lib/mpi/longlong.h @@ -1200,18 +1200,40 @@ do { \ "r" ((USItype)(v)) \ : "%g1", "%g2" __AND_CLOBBER_CC) #define UMUL_TIME 39 /* 39 instructions */ -#endif -#ifndef udiv_qrnnd -#ifndef LONGLONG_STANDALONE +/* It's quite necessary to add this much assembler for the sparc. + The default udiv_qrnnd (in C) is more than 10 times slower! */ #define udiv_qrnnd(q, r, n1, n0, d) \ -do { USItype __r; \ - (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ - (r) = __r; \ -} while (0) - extern USItype __udiv_qrnnd(); -#define UDIV_TIME 140 -#endif /* LONGLONG_STANDALONE */ -#endif /* udiv_qrnnd */ + __asm__ ("! Inlined udiv_qrnnd\n\t" \ + "mov 32,%%g1\n\t" \ + "subcc %1,%2,%%g0\n\t" \ + "1: bcs 5f\n\t" \ + "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ + "sub %1,%2,%1 ! this kills msb of n\n\t" \ + "addx %1,%1,%1 ! so this can't give carry\n\t" \ + "subcc %%g1,1,%%g1\n\t" \ + "2: bne 1b\n\t" \ + "subcc %1,%2,%%g0\n\t" \ + "bcs 3f\n\t" \ + "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ + "b 3f\n\t" \ + "sub %1,%2,%1 ! this kills msb of n\n\t" \ + "4: sub %1,%2,%1\n\t" \ + "5: addxcc %1,%1,%1\n\t" \ + "bcc 2b\n\t" \ + "subcc %%g1,1,%%g1\n\t" \ + "! Got carry from n. Subtract next step to cancel this carry.\n\t" \ + "bne 4b\n\t" \ + "addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n\t" \ + "sub %1,%2,%1\n\t" \ + "3: xnor %0,0,%0\n\t" \ + "! End of inline udiv_qrnnd\n" \ + : "=&r" ((USItype)(q)), \ + "=&r" ((USItype)(r)) \ + : "r" ((USItype)(d)), \ + "1" ((USItype)(n1)), \ + "0" ((USItype)(n0)) : "%g1", "cc") +#define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */ +#endif #endif /* __sparc__ */ /*************************************** |