diff options
author | Atsushi Nemoto <anemo@mba.ocn.ne.jp> | 2006-12-08 01:04:45 +0900 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2006-12-09 01:03:59 +0000 |
commit | 773ff78838ca3c07245e45c06235e0baaa5f710a (patch) | |
tree | c238920f34ab310a7a3d426cefbf9ebb1d5ea78c /arch/mips/lib/csum_partial.S | |
parent | 52ffe760ea9ec407292d093c3f06c1cda5187228 (diff) |
[MIPS] Optimize flow of csum_partial
Delete dead codes at end of the function and move small_csumcopy
there. This makes some labels (maybe_end_cruft, small_memcpy,
end_bytes, out) needless and eliminates some branches.
Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips/lib/csum_partial.S')
-rw-r--r-- | arch/mips/lib/csum_partial.S | 129 |
1 files changed, 54 insertions, 75 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index 3bffdbb1c1f..b04475d76f3 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -65,64 +65,6 @@ .text .set noreorder - -/* unknown src alignment and < 8 bytes to go */ -small_csumcpy: - move a1, t2 - - andi t0, a1, 4 - beqz t0, 1f - andi t0, a1, 2 - - /* Still a full word to go */ - ulw t1, (src) - PTR_ADDIU src, 4 - ADDC(sum, t1) - -1: move t1, zero - beqz t0, 1f - andi t0, a1, 1 - - /* Still a halfword to go */ - ulhu t1, (src) - PTR_ADDIU src, 2 - -1: beqz t0, 1f - sll t1, t1, 16 - - lbu t2, (src) - nop - -#ifdef __MIPSEB__ - sll t2, t2, 8 -#endif - or t1, t2 - -1: ADDC(sum, t1) - - /* fold checksum */ - sll v1, sum, 16 - addu sum, v1 - sltu v1, sum, v1 - srl sum, sum, 16 - addu sum, v1 - - /* odd buffer alignment? */ - beqz t7, 1f - nop - sll v1, sum, 8 - srl sum, sum, 8 - or sum, v1 - andi sum, 0xffff -1: - .set reorder - /* Add the passed partial csum. */ - ADDC(sum, a2) - jr ra - .set noreorder - -/* ------------------------------------------------------------------------- */ - .align 5 LEAF(csum_partial) move sum, zero @@ -132,8 +74,7 @@ LEAF(csum_partial) bnez t8, small_csumcpy /* < 8 bytes to copy */ move t2, a1 - beqz a1, out - andi t7, src, 0x1 /* odd buffer? */ + andi t7, src, 0x1 /* odd buffer? */ hword_align: beqz t7, word_align @@ -232,8 +173,9 @@ move_32bytes: PTR_ADDU src, src, 0x20 do_end_words: - beqz t8, maybe_end_cruft - LONG_SRL t8, t8, 0x2 + beqz t8, small_csumcpy + andi t2, a1, 0x3 + LONG_SRL t8, t8, 0x2 end_words: lw t0, (src) @@ -242,21 +184,58 @@ end_words: bnez t8, end_words PTR_ADDU src, src, 0x4 -maybe_end_cruft: - andi t2, a1, 0x3 +/* unknown src alignment and < 8 bytes to go */ +small_csumcpy: + move a1, t2 -small_memcpy: - j small_csumcpy; move a1, t2 /* XXX ??? */ - beqz t2, out - move a1, t2 + andi t0, a1, 4 + beqz t0, 1f + andi t0, a1, 2 -end_bytes: - lb t0, (src) - LONG_SUBU a1, a1, 0x1 - bnez a2, end_bytes - PTR_ADDU src, src, 0x1 + /* Still a full word to go */ + ulw t1, (src) + PTR_ADDIU src, 4 + ADDC(sum, t1) + +1: move t1, zero + beqz t0, 1f + andi t0, a1, 1 + + /* Still a halfword to go */ + ulhu t1, (src) + PTR_ADDIU src, 2 + +1: beqz t0, 1f + sll t1, t1, 16 + + lbu t2, (src) + nop + +#ifdef __MIPSEB__ + sll t2, t2, 8 +#endif + or t1, t2 + +1: ADDC(sum, t1) -out: + /* fold checksum */ + sll v1, sum, 16 + addu sum, v1 + sltu v1, sum, v1 + srl sum, sum, 16 + addu sum, v1 + + /* odd buffer alignment? */ + beqz t7, 1f + nop + sll v1, sum, 8 + srl sum, sum, 8 + or sum, v1 + andi sum, 0xffff +1: + .set reorder + /* Add the passed partial csum. */ + ADDC(sum, a2) jr ra - move v0, sum + .set noreorder END(csum_partial) |