diff options
33 files changed, 340 insertions, 680 deletions
diff --git a/asmcomp/amd64/emit.mlp b/asmcomp/amd64/emit.mlp index b3775362d..674ed2adb 100644 --- a/asmcomp/amd64/emit.mlp +++ b/asmcomp/amd64/emit.mlp @@ -514,6 +514,8 @@ let emit_instr fallthrough i = | Lop(Iintop(Ilsl | Ilsr | Iasr as op)) -> (* We have i.arg.(0) = i.res.(0) and i.arg.(1) = %rcx *) ` {emit_string(instr_for_intop op)} %cl, {emit_reg i.res.(0)}\n` + | Lop(Iintop Imulh) -> + ` imulq {emit_reg i.arg.(1)}\n` | Lop(Iintop op) -> (* We have i.arg.(0) = i.res.(0) *) ` {emit_string(instr_for_intop op)} {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n` @@ -523,61 +525,6 @@ let emit_instr fallthrough i = ` incq {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) -> ` decq {emit_reg i.res.(0)}\n` - | Lop(Iintop_imm(Idiv, n)) -> - (* Note: i.arg.(0) = i.res.(0) = rcx (cf. selection.ml) *) - let l = Misc.log2 n in - if n = 1 lsl l then begin - ` movq {emit_reg i.arg.(0)}, %rax\n`; - ` addq ${emit_int(n-1)}, {emit_reg i.arg.(0)}\n`; - ` testq %rax, %rax\n`; - ` cmovns %rax, {emit_reg i.arg.(0)}\n`; - ` sarq ${emit_int l}, {emit_reg i.res.(0)}\n` - end else begin - let (m, p) = Selectgen.divimm_parameters (Nativeint.of_int n) in - (* Algorithm: - t = multiply-high-signed(arg, m) - if m < 0, t = t + m - t = shift-right-signed(t, p) - res = t + sign-bit(arg) - *) - ` movabsq ${emit_nativeint m}, %rdx\n`; - ` movq %rcx, %rax\n`; - ` imulq %rdx\n`; (* rdx = high 64 bits of arg * m, signed *) - if m < 0n then - ` addq %rcx, %rdx\n`; - if p > 0 then - ` sarq ${emit_int p}, %rdx\n`; - ` shrq $63, %rcx\n`; - ` addq %rdx, %rcx\n` - end - | Lop(Iintop_imm(Imod, n)) -> - (* Note: i.arg.(0) = i.res.(0) = rcx (cf. selection.ml) *) - let l = Misc.log2 n in - if n = 1 lsl l then begin - ` movq {emit_reg i.arg.(0)}, %rax\n`; - ` testq %rax, %rax\n`; - ` leaq {emit_int(n-1)}(%rax), %rax\n`; (* flags preserved *) - ` cmovns {emit_reg i.arg.(0)}, %rax\n`; - ` andq ${emit_int (-n)}, %rax\n`; - ` subq %rax, {emit_reg i.res.(0)}\n` - end else begin - let (m, p) = Selectgen.divimm_parameters (Nativeint.of_int n) in - (* Compute quotient as in the Idiv immediate case, but - compute it in rdx, preserving rcx *) - ` movabsq ${emit_nativeint m}, %rdx\n`; - ` movq %rcx, %rax\n`; - ` imulq %rdx\n`; - if m < 0n then - ` addq %rcx, %rdx\n`; - if p > 0 then - ` sarq ${emit_int p}, %rdx\n`; - ` movq %rcx, %rax\n`; - ` shrq $63, %rax\n`; - ` addq %rax, %rdx\n`; - (* Compute remainder via Euclid's equality *) - ` imulq ${emit_int n}, %rdx\n`; - ` subq %rdx, %rcx\n` - end | Lop(Iintop_imm(op, n)) -> (* We have i.arg.(0) = i.res.(0) *) ` {emit_string(instr_for_intop op)} ${emit_int n}, {emit_reg i.res.(0)}\n` diff --git a/asmcomp/amd64/emit_nt.mlp b/asmcomp/amd64/emit_nt.mlp index 468d8334a..cb023bb8c 100644 --- a/asmcomp/amd64/emit_nt.mlp +++ b/asmcomp/amd64/emit_nt.mlp @@ -522,6 +522,8 @@ let emit_instr fallthrough i = | Lop(Iintop(Ilsl | Ilsr | Iasr as op)) -> (* We have i.arg.(0) = i.res.(0) and i.arg.(1) = %rcx *) ` {emit_string(instr_for_intop op)} {emit_reg i.res.(0)}, cl\n` + | Lop(Iintop Imulh) -> + ` imul {emit_reg i.arg.(1)}\n` | Lop(Iintop op) -> (* We have i.arg.(0) = i.res.(0) *) ` {emit_string(instr_for_intop op)} {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}\n` @@ -531,61 +533,6 @@ let emit_instr fallthrough i = ` inc {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) -> ` dec {emit_reg i.res.(0)}\n` - | Lop(Iintop_imm(Idiv, n)) -> - (* Note: i.arg.(0) = i.res.(0) = rcx (cf. selection.ml) *) - let l = Misc.log2 n in - if n = 1 lsl l then begin - ` mov rax, {emit_reg i.arg.(0)}\n`; - ` add {emit_reg i.arg.(0)}, {emit_int(n-1)}\n`; - ` test rax, rax\n`; - ` cmovns {emit_reg i.arg.(0)}, rax\n`; - ` sar {emit_reg i.res.(0)}, {emit_int l}\n` - end else begin - let (m, p) = Selectgen.divimm_parameters (Nativeint.of_int n) in - (* Algorithm: - t = multiply-high-signed(arg, m) - if m < 0, t = t + m - t = shift-right-signed(t, p) - res = t + sign-bit(arg) - *) - emit_movabs rdx m; - ` mov rax, rcx\n`; - ` imul rdx\n`; (* rdx = high 64 bits of arg * m, signed *) - if m < 0n then - ` add rdx, rcx\n`; - if p > 0 then - ` sar rdx, {emit_int p}\n`; - ` shr rcx, 63\n`; - ` add rcx, rdx\n` - end - | Lop(Iintop_imm(Imod, n)) -> - (* Note: i.arg.(0) = i.res.(0) = rcx (cf. selection.ml) *) - let l = Misc.log2 n in - if n = 1 lsl l then begin - ` mov rax, {emit_reg i.arg.(0)}\n`; - ` test rax, rax\n`; - ` lea rax, {emit_int(n-1)}[rax]\n`; - ` cmovns rax, {emit_reg i.arg.(0)}\n`; - ` and rax, {emit_int (-n)}\n`; - ` sub {emit_reg i.res.(0)}, rax\n` - end else begin - let (m, p) = Selectgen.divimm_parameters (Nativeint.of_int n) in - (* Compute quotient as in the Idiv immediate case, but - compute it in rdx, preserving rcx *) - emit_movabs rdx m; - ` mov rax, rcx\n`; - ` imul rdx\n`; - if m < 0n then - ` add rdx, rcx\n`; - if p > 0 then - ` sar rdx, {emit_int p}\n`; - ` mov rax, rcx\n`; - ` shr rax, 63\n`; - ` add rdx, rax\n`; - (* Compute remainder via Euclid's equality *) - ` imul rdx, {emit_int n}\n`; - ` sub rcx, rdx\n` - end | Lop(Iintop_imm(op, n)) -> (* We have i.arg.(0) = i.res.(0) *) ` {emit_string(instr_for_intop op)} {emit_reg i.res.(0)}, {emit_int n}\n` diff --git a/asmcomp/amd64/proc.ml b/asmcomp/amd64/proc.ml index 838affba6..ddec43db7 100644 --- a/asmcomp/amd64/proc.ml +++ b/asmcomp/amd64/proc.ml @@ -260,7 +260,7 @@ let destroyed_at_oper = function | Iop(Iintop(Idiv | Imod)) | Iop(Iintop_imm((Idiv | Imod), _)) -> [| rax; rdx |] | Iop(Istore(Single, _)) -> [| rxmm15 |] - | Iop(Ialloc _ | Iintop(Icomp _) | Iintop_imm((Icomp _), _)) + | Iop(Ialloc _ | Iintop(Imulh | Icomp _) | Iintop_imm((Icomp _), _)) -> [| rax |] | Iswitch(_, _) -> [| rax; rdx |] | _ -> diff --git a/asmcomp/amd64/reload.ml b/asmcomp/amd64/reload.ml index 510f201f1..a7cb86028 100644 --- a/asmcomp/amd64/reload.ml +++ b/asmcomp/amd64/reload.ml @@ -32,7 +32,8 @@ open Mach Istore R R Iintop(Icomp) R R S or S S R - Iintop(Imul|Idiv|mod) R R S + Iintop(Imul|Idiv|Imod) R R S + Iintop(Imulh) R R S Iintop(shift) S S R Iintop(others) R R S or S S R @@ -71,10 +72,10 @@ method! reload_operation op arg res = (* This add will be turned into a lea; args and results must be in registers *) super#reload_operation op arg res - | Iintop(Idiv | Imod | Ilsl | Ilsr | Iasr) + | Iintop(Imulh | Idiv | Imod | Ilsl | Ilsr | Iasr) | Iintop_imm(_, _) -> (* The argument(s) and results can be either in register or on stack *) - (* Note: Idiv, Imod: arg(0) and res(0) already forced in regs + (* Note: Imulh, Idiv, Imod: arg(0) and res(0) already forced in regs Ilsl, Ilsr, Iasr: arg(1) already forced in regs *) (arg, res) | Iintop(Imul) | Iaddf | Isubf | Imulf | Idivf -> diff --git a/asmcomp/amd64/selection.ml b/asmcomp/amd64/selection.ml index 97e0e84de..cce7e575d 100644 --- a/asmcomp/amd64/selection.ml +++ b/asmcomp/amd64/selection.ml @@ -91,6 +91,10 @@ let pseudoregs_for_operation op arg res = (rax, rbx, rcx or rdx). Keep it simple, just force the argument in rax. *) | Ispecific(Ibswap 16) -> ([| rax |], [| rax |]) + (* For imulq, first arg must be in rax, rax is clobbered, and result is in + rdx. *) + | Iintop(Imulh) -> + ([| rax; arg.(1) |], [| rdx |]) | Ispecific(Ifloatarithmem(_,_)) -> let arg' = Array.copy arg in arg'.(0) <- res.(0); @@ -105,10 +109,6 @@ let pseudoregs_for_operation op arg res = ([| rax; rcx |], [| rax |]) | Iintop(Imod) -> ([| rax; rcx |], [| rdx |]) - (* For div and mod with immediate operand, arg must not be in rax nor rdx. - Keep it simple, force it in rcx. *) - | Iintop_imm((Idiv|Imod), _) -> - ([| rcx |], [| rcx |]) (* Other instructions are regular *) | _ -> raise Use_default @@ -176,19 +176,6 @@ method! select_operation op args = | (Iindexed2 0, _) -> super#select_operation op args | (addr, arg) -> (Ispecific(Ilea addr), [arg]) end - (* Recognize (x / cst) and (x % cst) only if cst is > 0. *) - | Cdivi -> - begin match args with - [arg1; Cconst_int n] when self#is_immediate n && n > 0 -> - (Iintop_imm(Idiv, n), [arg1]) - | _ -> (Iintop Idiv, args) - end - | Cmodi -> - begin match args with - [arg1; Cconst_int n] when self#is_immediate n && n > 0 -> - (Iintop_imm(Imod, n), [arg1]) - | _ -> (Iintop Imod, args) - end (* Recognize float arithmetic with memory. *) | Caddf -> self#select_floatarith true Iaddf Ifloatadd args @@ -225,6 +212,9 @@ method! select_operation op args = | Cextcall("caml_int64_direct_bswap", _, _, _) | Cextcall("caml_nativeint_direct_bswap", _, _, _) -> (Ispecific (Ibswap 64), args) + (* AMD64 does not support immediate operands for multiply high signed *) + | Cmulhi -> + (Iintop Imulh, args) | _ -> super#select_operation op args (* Recognize float arithmetic with mem *) diff --git a/asmcomp/arm/arch.ml b/asmcomp/arm/arch.ml index bdbac71bf..fbd9f6db0 100644 --- a/asmcomp/arm/arch.ml +++ b/asmcomp/arm/arch.ml @@ -110,6 +110,7 @@ type specific_operation = Ishiftarith of arith_operation * shift_operation * int | Ishiftcheckbound of shift_operation * int | Irevsubimm of int + | Imulhadd (* multiply high and add *) | Imuladd (* multiply and add *) | Imulsub (* multiply and subtract *) | Inegmulf (* floating-point negate and multiply *) @@ -193,6 +194,11 @@ let print_specific_operation printreg op ppf arg = printreg arg.(1) | Irevsubimm n -> fprintf ppf "%i %s %a" n "-" printreg arg.(0) + | Imulhadd -> + fprintf ppf "%a *h %a) + %a" + printreg arg.(0) + printreg arg.(1) + printreg arg.(2) | Imuladd -> fprintf ppf "(%a * %a) + %a" printreg arg.(0) diff --git a/asmcomp/arm/emit.mlp b/asmcomp/arm/emit.mlp index 4fd0ed2f7..55a8f96b0 100644 --- a/asmcomp/arm/emit.mlp +++ b/asmcomp/arm/emit.mlp @@ -174,15 +174,16 @@ let name_for_comparison = function let name_for_int_operation = function (* Use adds,subs,... to enable 16-bit T1 encoding *) - Iadd -> "adds" - | Isub -> "subs" - | Imul -> "mul" - | Iand -> "ands" - | Ior -> "orrs" - | Ixor -> "eors" - | Ilsl -> "lsls" - | Ilsr -> "lsrs" - | Iasr -> "asrs" + Iadd -> "adds" + | Isub -> "subs" + | Imul -> "mul" + | Imulh -> "smmul" + | Iand -> "ands" + | Ior -> "orrs" + | Ixor -> "eors" + | Ilsl -> "lsls" + | Ilsr -> "lsrs" + | Iasr -> "asrs" | _ -> assert false let name_for_shift_operation = function @@ -593,91 +594,16 @@ let emit_instr i = let op = name_for_shift_operation shiftop in ` cmp {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, {emit_string op} #{emit_int n}\n`; ` bcs {emit_label lbl}\n`; 2 + | Lop(Iintop Imulh) when !arch < ARMv6 -> + ` smull r12, {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; 1 + | Lop(Ispecific Imulhadd) -> + ` smmla {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`; 1 | Lop(Iintop op) -> let instr = name_for_int_operation op in ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; 1 - | Lop(Iintop_imm(Idiv, n)) -> - let l = Misc.log2 n in - let a = i.arg.(0) in - let r = i.res.(0) in - if n = 1 lsl l then begin - ` movs {emit_reg r}, {emit_reg a}\n`; - if n <= 256 then begin - ` it lt\n`; - ` addlt {emit_reg r}, {emit_reg r}, #{emit_int (n-1)}\n` - end else begin - ` itt lt\n`; - ` addlt {emit_reg r}, {emit_reg r}, #{emit_int n}\n`; - ` sublt {emit_reg r}, {emit_reg r}, #1\n` - end; - (* Use movs to enable 16-bit T1 encoding *) - ` movs {emit_reg r}, {emit_reg r}, asr #{emit_int l}\n`; 5 - end else begin - assert (!arch >= ARMv6); - let (m, p) = Selectgen.divimm_parameters (Nativeint.of_int n) in - (* Algorithm: - t = multiply-high-signed(arg, m) - if m < 0, t = t + arg - t = shift-right-signed(t, p) - res = t + sign-bit(arg) - *) - let ninstr = emit_intconst r (Nativeint.to_int32 m) in - if m >= 0n then - ` smmul {emit_reg r}, {emit_reg r}, {emit_reg a}\n` - else - ` smmla {emit_reg r}, {emit_reg r}, {emit_reg a}, {emit_reg a}\n`; - if p > 0 then - ` movs {emit_reg r}, {emit_reg r}, asr #{emit_int p}\n`; - ` add {emit_reg r}, {emit_reg r}, {emit_reg a}, lsr #31\n`; - ninstr + 3 - end - | Lop(Iintop_imm(Imod, n)) -> - let l = Misc.log2 n in - let a = i.arg.(0) in - let r = i.res.(0) in - if n = 1 lsl l then begin - let lbl = new_label() in - ` cmp {emit_reg a}, #0\n`; - ` mov {emit_reg r}, {emit_reg a}, lsl #{emit_int (32-l)}\n`; - ` mov {emit_reg r}, {emit_reg r}, lsr #{emit_int (32-l)}\n`; - ` bpl {emit_label lbl}\n`; - ` cmp {emit_reg r}, #0\n`; - ` it ne\n`; - ` subne {emit_reg r}, {emit_reg r}, #{emit_int n}\n`; - `{emit_label lbl}:\n`; 7 - end else begin - assert (!arch >= ARMv6); - let (m, p) = Selectgen.divimm_parameters (Nativeint.of_int n) in - (* Algorithm: - t = multiply-high-signed(arg, m) - if m < 0, t = t + arg - t = shift-right-signed(t, p) - t = t + sign-bit(arg) - res = arg - (t * n) - *) - let r12 = phys_reg 8 in - let ninstr = emit_intconst r (Nativeint.to_int32 m) in - if m >= 0n then - ` smmul {emit_reg r}, {emit_reg r}, {emit_reg a}\n` - else - ` smmla {emit_reg r}, {emit_reg r}, {emit_reg a}, {emit_reg a}\n`; - if p > 0 then - ` movs {emit_reg r}, {emit_reg r}, asr #{emit_int p}\n`; - ` add {emit_reg r}, {emit_reg r}, {emit_reg a}, lsr #31\n`; - (* Compute remainder via Euclid's equality *) - let ninstr = ninstr + emit_intconst r12 (Int32.of_int n) in - if !arch >= ARMv6T2 then begin - ` mls {emit_reg r}, {emit_reg r}, r12, {emit_reg a}\n`; - ninstr + 4 - end else begin - ` mul {emit_reg r}, {emit_reg r}, r12\n`; - ` sub {emit_reg r}, {emit_reg a}, {emit_reg r}\n`; - ninstr + 5 - end - end | Lop(Iintop_imm(op, n)) -> let instr = name_for_int_operation op in - ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`; 1 + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`; 1 | Lop(Iabsf | Inegf as op) when !fpu = Soft -> let instr = (match op with Iabsf -> "bic" diff --git a/asmcomp/arm/proc.ml b/asmcomp/arm/proc.ml index 48555a5c2..a16c35a22 100644 --- a/asmcomp/arm/proc.ml +++ b/asmcomp/arm/proc.ml @@ -201,7 +201,7 @@ let destroyed_at_oper = function destroyed_at_alloc | Iop(Iconst_symbol _) when !pic_code -> [| phys_reg 3; phys_reg 8 |] (* r3 and r12 destroyed *) - | Iop(Iintop_imm(Imod, n)) when !arch >= ARMv6 && n <> 1 lsl Misc.log2 n -> + | Iop(Iintop Imulh) when !arch < ARMv6 -> [| phys_reg 8 |] (* r12 destroyed *) | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _)) -> [| phys_reg 107 |] (* d7 (s14-s15) destroyed *) diff --git a/asmcomp/arm/scheduling.ml b/asmcomp/arm/scheduling.ml index 9e2d65bc6..0d6618ab9 100644 --- a/asmcomp/arm/scheduling.ml +++ b/asmcomp/arm/scheduling.ml @@ -31,8 +31,8 @@ method oper_latency = function | Ifloatofint (* mcr/mrc count as memory access *) | Iintoffloat -> 2 (* Multiplys have a latency of two cycles *) - | Iintop Imul - | Ispecific(Imuladd | Imulsub) -> 2 + | Iintop (Imul | Imulh) + | Ispecific(Imuladd | Imulsub | Imulhadd) -> 2 (* VFP instructions *) | Iaddf | Isubf @@ -58,10 +58,8 @@ method oper_issue_cycles = function | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) -> 2 | Ispecific(Ishiftcheckbound _) -> 3 - | Iintop_imm(Idiv, _) -> 4 - | Iintop_imm(Imod, _) -> 6 - | Iintop Imul - | Ispecific(Imuladd | Imulsub) -> 2 + | Iintop(Imul | Imulh) + | Ispecific(Imuladd | Imulsub | Imulhadd) -> 2 (* VFP instructions *) | Iaddf | Isubf -> 7 diff --git a/asmcomp/arm/selection.ml b/asmcomp/arm/selection.ml index 474f1a391..9cd6090cd 100644 --- a/asmcomp/arm/selection.ml +++ b/asmcomp/arm/selection.ml @@ -57,15 +57,11 @@ let pseudoregs_for_operation op arg res = is also a result of the mul / mla operation. *) Iintop Imul | Ispecific Imuladd when !arch < ARMv6 -> (arg, [| res.(0); arg.(0) |]) - (* For integer division by a constant, which is not a power of 2, on ARMv6 - and later, the result and argument registers must be different. We deal - with this by pretending that the argument value is also a result of the - operation. For modulus we also require a scratch register (r12) that - is different from both the result and argument registers. *) - | Iintop_imm(Idiv, n) when !arch >= ARMv6 && n <> 1 lsl Misc.log2 n-> + (* For smull rdlo,rdhi,rn,rm (pre-ARMv6) the registers rdlo, rdhi and rn + must be different. We deal with this by pretending that rn is also a + result of the smull operation. *) + | Iintop Imulh when !arch < ARMv6 -> (arg, [| res.(0); arg.(0) |]) - | Iintop_imm(Imod, n) when !arch >= ARMv6 && n <> 1 lsl Misc.log2 n-> - (arg, [| res.(0); arg.(0); r12 |]) (* Soft-float Iabsf and Inegf: arg.(0) and res.(0) must be the same *) | Iabsf | Inegf when !fpu = Soft -> ([|res.(0); arg.(1)|], res) @@ -132,8 +128,17 @@ method select_shift_arith op arithop arithrevop args = (Ispecific(Ishiftarith(arithrevop, select_shiftop op, n)), [arg2; arg1]) | args -> begin match super#select_operation op args with + (* Recognize multiply high and add *) + (Iintop Iadd, [Cop(Cmulhi, args); arg3]) + | (Iintop Iadd, [arg3; Cop(Cmulhi, args)]) as op_args + when !arch >= ARMv6 -> + begin match self#select_operation Cmulhi args with + (Iintop Imulh, [arg1; arg2]) -> + (Ispecific Imulhadd, [arg1; arg2; arg3]) + | _ -> op_args + end (* Recognize multiply and add *) - (Iintop Iadd, [Cop(Cmuli, args); arg3]) + | (Iintop Iadd, [Cop(Cmuli, args); arg3]) | (Iintop Iadd, [arg3; Cop(Cmuli, args)]) as op_args -> begin match self#select_operation Cmuli args with (Iintop Imul, [arg1; arg2]) -> @@ -179,15 +184,11 @@ method! select_operation op args = (* ARM does not support immediate operands for multiplication *) | (Cmuli, args) -> (Iintop Imul, args) + | (Cmulhi, args) -> + (Iintop Imulh, args) (* Turn integer division/modulus into runtime ABI calls *) - | (Cdivi, [arg; Cconst_int n]) when n > 0 && (!arch >= ARMv6 - || n = 1 lsl Misc.log2 n) -> - (Iintop_imm(Idiv, n), [arg]) | (Cdivi, args) -> (Iextcall("__aeabi_idiv", false), args) - | (Cmodi, [arg; Cconst_int n]) when n > 0 && (!arch >= ARMv6 - || n = 1 lsl Misc.log2 n) -> - (Iintop_imm(Imod, n), [arg]) | (Cmodi, args) -> (* See above for fix up of return register *) (Iextcall("__aeabi_idivmod", false), args) diff --git a/asmcomp/arm64/arch.ml b/asmcomp/arm64/arch.ml index a53251f51..bfbe183fb 100644 --- a/asmcomp/arm64/arch.ml +++ b/asmcomp/arm64/arch.ml @@ -143,4 +143,3 @@ let print_specific_operation printreg op ppf arg = | Ibswap n -> fprintf ppf "bswap%i %a" n printreg arg.(0) - diff --git a/asmcomp/arm64/emit.mlp b/asmcomp/arm64/emit.mlp index 207d5b8ef..bc03c5d52 100644 --- a/asmcomp/arm64/emit.mlp +++ b/asmcomp/arm64/emit.mlp @@ -454,41 +454,8 @@ let emit_instr i = | Lop(Iintop Imod) -> ` sdiv {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ` msub {emit_reg i.res.(0)}, {emit_reg reg_tmp1}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n` - | Lop(Iintop_imm(Idiv, n)) -> - let l = Misc.log2 n in - if n = 1 lsl l then begin - ` asr {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, #63\n`; - ` add {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg reg_tmp1}, lsr {emit_int (64-l)}\n`; - ` asr {emit_reg i.res.(0)}, {emit_reg reg_tmp1}, {emit_int l}\n` - end else begin - let (m, p) = Selectgen.divimm_parameters (Nativeint.of_int n) in - emit_intconst reg_tmp1 m; - ` smulh {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg reg_tmp1}\n`; - if m < 0n then - ` add {emit_reg reg_tmp1}, {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}\n`; - if p > 0 then - ` asr {emit_reg reg_tmp1}, #{emit_int p}\n`; - ` add {emit_reg i.res.(0)}, {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, lsr 63\n` - end - | Lop(Iintop_imm(Imod, n)) -> - let l = Misc.log2 n in - if n = 1 lsl l then begin - ` asr {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, #63\n`; - ` add {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg reg_tmp1}, lsr {emit_int (64-l)}\n`; - ` asr {emit_reg reg_tmp1}, {emit_reg reg_tmp1}, {emit_int l}\n`; - ` sub {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg reg_tmp1}, lsl {emit_int l}\n` - end else begin - let (m, p) = Selectgen.divimm_parameters (Nativeint.of_int n) in - emit_intconst reg_tmp1 m; - ` smulh {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg reg_tmp1}\n`; - if m < 0n then - ` add {emit_reg reg_tmp1}, {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}\n`; - if p > 0 then - ` asr {emit_reg reg_tmp1}, #{emit_int p}\n`; - ` add {emit_reg reg_tmp1}, {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, lsr 63\n`; - emit_intconst reg_tmp2 (Nativeint.of_int n); - ` msub {emit_reg i.res.(0)}, {emit_reg reg_tmp1}, {emit_reg reg_tmp2}, {emit_reg i.arg.(0)}\n` - end + | Lop(Iintop Imulh) -> + ` smulh {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` | Lop(Iintop op) -> let instr = name_for_int_operation op in ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` diff --git a/asmcomp/arm64/selection.ml b/asmcomp/arm64/selection.ml index 6af158f70..36b401894 100644 --- a/asmcomp/arm64/selection.ml +++ b/asmcomp/arm64/selection.ml @@ -184,31 +184,9 @@ method! select_operation op args = (* Integer multiplication *) (* ARM does not support immediate operands for multiplication *) | Cmuli -> - begin match args with - | [arg; Cconst_int n] | [Cconst_int n; arg] -> - let l = Misc.log2 n in - if n = 1 lsl l - then (Iintop_imm(Ilsl, l), [arg]) - else (Iintop Imul, args) - | _ -> - (Iintop Imul, args) - end - (* Division and modulus *) - (* Recognize (x / cst) and (x % cst) only if cst is > 0. *) - | Cdivi -> - begin match args with - | [arg; Cconst_int n] when n > 0 -> - ((if n = 1 then Imove else Iintop_imm(Idiv, n)), [arg]) - | _ -> - (Iintop Idiv, args) - end - | Cmodi -> - begin match args with - | [arg; Cconst_int n] when n > 0 -> - ((if n = 1 then Iconst_int 0n else Iintop_imm(Imod, n)), [arg]) - | _ -> - (Iintop Imod, args) - end + (Iintop Imul, args) + | Cmulhi -> + (Iintop Imulh, args) (* Bitwise logical operations have a different range of immediate operands than the other instructions *) | Cand -> self#select_logical Iand args diff --git a/asmcomp/cmm.ml b/asmcomp/cmm.ml index fc763c8ae..cdb833896 100644 --- a/asmcomp/cmm.ml +++ b/asmcomp/cmm.ml @@ -70,7 +70,7 @@ type operation = | Cload of memory_chunk | Calloc | Cstore of memory_chunk - | Caddi | Csubi | Cmuli | Cdivi | Cmodi + | Caddi | Csubi | Cmuli | Cmulhi | Cdivi | Cmodi | Cand | Cor | Cxor | Clsl | Clsr | Casr | Ccmpi of comparison | Cadda | Csuba diff --git a/asmcomp/cmm.mli b/asmcomp/cmm.mli index 9a6ebd1e4..2ae9eb658 100644 --- a/asmcomp/cmm.mli +++ b/asmcomp/cmm.mli @@ -56,7 +56,7 @@ type operation = | Cload of memory_chunk | Calloc | Cstore of memory_chunk - | Caddi | Csubi | Cmuli | Cdivi | Cmodi + | Caddi | Csubi | Cmuli | Cmulhi | Cdivi | Cmodi | Cand | Cor | Cxor | Clsl | Clsr | Casr | Ccmpi of comparison | Cadda | Csuba diff --git a/asmcomp/cmmgen.ml b/asmcomp/cmmgen.ml index 6a1deeda1..591822f56 100644 --- a/asmcomp/cmmgen.ml +++ b/asmcomp/cmmgen.ml @@ -136,26 +136,6 @@ let mul_int c1 c2 = | (c1, c2) -> Cop(Cmuli, [c1; c2]) -let tag_int = function - Cconst_int n -> int_const n - | c -> Cop(Caddi, [Cop(Clsl, [c; Cconst_int 1]); Cconst_int 1]) - -let force_tag_int = function - Cconst_int n -> int_const n - | c -> Cop(Cor, [Cop(Clsl, [c; Cconst_int 1]); Cconst_int 1]) - -let untag_int = function - Cconst_int n -> Cconst_int(n asr 1) - | Cop(Caddi, [Cop(Clsl, [c; Cconst_int 1]); Cconst_int 1]) -> c - | Cop(Cor, [Cop(Casr, [c; Cconst_int n]); Cconst_int 1]) - when n > 0 && n < size_int * 8 -> - Cop(Casr, [c; Cconst_int (n+1)]) - | Cop(Cor, [Cop(Clsr, [c; Cconst_int n]); Cconst_int 1]) - when n > 0 && n < size_int * 8 -> - Cop(Clsr, [c; Cconst_int (n+1)]) - | Cop(Cor, [c; Cconst_int 1]) -> Cop(Casr, [c; Cconst_int 1]) - | c -> Cop(Casr, [c; Cconst_int 1]) - let lsl_int c1 c2 = match (c1, c2) with (Cop(Clsl, [c; Cconst_int n1]), Cconst_int n2) @@ -172,64 +152,251 @@ let ignore_low_bit_int = function let lsr_int c1 c2 = match c2 with - (Cconst_int n) when n > 0 -> - Cop(Clsr, [ignore_low_bit_int c1; c2]) + Cconst_int 0 -> + c1 + | Cconst_int n when n > 0 -> + Cop(Clsr, [ignore_low_bit_int c1; c2]) | _ -> - Cop(Clsr, [c1; c2]) + Cop(Clsr, [c1; c2]) let asr_int c1 c2 = match c2 with - (Cconst_int n) when n > 0 -> - Cop(Casr, [ignore_low_bit_int c1; c2]) + Cconst_int 0 -> + c1 + | Cconst_int n when n > 0 -> + Cop(Casr, [ignore_low_bit_int c1; c2]) | _ -> - Cop(Casr, [c1; c2]) + Cop(Casr, [c1; c2]) -(* Division or modulo on tagged integers. The overflow case min_int / -1 - cannot occur, but we must guard against division by zero. *) +let tag_int = function + Cconst_int n -> + int_const n + | Cop(Casr, [c; Cconst_int n]) when n > 0 -> + Cop(Cor, [asr_int c (Cconst_int (n - 1)); Cconst_int 1]) + | c -> + incr_int (lsl_int c (Cconst_int 1)) -let is_different_from x = function - Cconst_int n -> n <> x - | Cconst_natint n -> n <> Nativeint.of_int x - | _ -> false +let force_tag_int = function + Cconst_int n -> + int_const n + | Cop(Casr, [c; Cconst_int n]) when n > 0 -> + Cop(Cor, [asr_int c (Cconst_int (n - 1)); Cconst_int 1]) + | c -> + Cop(Cor, [lsl_int c (Cconst_int 1); Cconst_int 1]) -let safe_divmod op c1 c2 dbg = - if !Clflags.fast || is_different_from 0 c2 then - Cop(op, [c1; c2]) - else - bind "divisor" c2 (fun c2 -> - Cifthenelse(c2, - Cop(op, [c1; c2]), - Cop(Craise (Raise_regular, dbg), - [Cconst_symbol "caml_exn_Division_by_zero"]))) +let untag_int = function + Cconst_int n -> Cconst_int(n asr 1) + | Cop(Caddi, [Cop(Clsl, [c; Cconst_int 1]); Cconst_int 1]) -> c + | Cop(Cor, [Cop(Casr, [c; Cconst_int n]); Cconst_int 1]) + when n > 0 && n < size_int * 8 -> + Cop(Casr, [c; Cconst_int (n+1)]) + | Cop(Cor, [Cop(Clsr, [c; Cconst_int n]); Cconst_int 1]) + when n > 0 && n < size_int * 8 -> + Cop(Clsr, [c; Cconst_int (n+1)]) + | Cop(Cor, [c; Cconst_int 1]) -> Cop(Casr, [c; Cconst_int 1]) + | c -> Cop(Casr, [c; Cconst_int 1]) + +(* Turning integer divisions into multiply-high then shift. + The [division_parameters] function is used in module Emit for + those target platforms that support this optimization. *) + +(* Unsigned comparison between native integers. *) + +let ucompare x y = Nativeint.(compare (add x min_int) (add y min_int)) + +(* Unsigned division and modulus at type nativeint. + Algorithm: Hacker's Delight section 9.3 *) + +let udivmod n d = Nativeint.( + if d < 0n then + if ucompare n d < 0 then (0n, n) else (1n, sub n d) + else begin + let q = shift_left (div (shift_right_logical n 1) d) 1 in + let r = sub n (mul q d) in + if ucompare r d >= 0 then (succ q, sub r d) else (q, r) + end) + +(* Compute division parameters. + Algorithm: Hacker's Delight chapter 10, fig 10-1. *) + +let divimm_parameters d = Nativeint.( + assert (d > 0n); + let twopsm1 = min_int in (* 2^31 for 32-bit archs, 2^63 for 64-bit archs *) + let nc = sub (pred twopsm1) (snd (udivmod twopsm1 d)) in + let rec loop p (q1, r1) (q2, r2) = + let p = p + 1 in + let q1 = shift_left q1 1 and r1 = shift_left r1 1 in + let (q1, r1) = + if ucompare r1 nc >= 0 then (succ q1, sub r1 nc) else (q1, r1) in + let q2 = shift_left q2 1 and r2 = shift_left r2 1 in + let (q2, r2) = + if ucompare r2 d >= 0 then (succ q2, sub r2 d) else (q2, r2) in + let delta = sub d r2 in + if ucompare q1 delta < 0 || (q1 = delta && r1 = 0n) + then loop p (q1, r1) (q2, r2) + else (succ q2, p - size) + in loop (size - 1) (udivmod twopsm1 nc) (udivmod twopsm1 d)) + +(* The result [(m, p)] of [divimm_parameters d] satisfies the following + inequality: + + 2^(wordsize + p) < m * d <= 2^(wordsize + p) + 2^(p + 1) (i) + + from which it follows that + + floor(n / d) = floor(n * m / 2^(wordsize+p)) + if 0 <= n < 2^(wordsize-1) + ceil(n / d) = floor(n * m / 2^(wordsize+p)) + 1 + if -2^(wordsize-1) <= n < 0 + + The correctness condition (i) above can be checked by the code below. + It was exhaustively tested for values of d from 2 to 10^9 in the + wordsize = 64 case. + +let add2 (xh, xl) (yh, yl) = + let zl = add xl yl and zh = add xh yh in + ((if ucompare zl xl < 0 then succ zh else zh), zl) + +let shl2 (xh, xl) n = + assert (0 < n && n < size + size); + if n < size + then (logor (shift_left xh n) (shift_right_logical xl (size - n)), + shift_left xl n) + else (shift_left xl (n - size), 0n) + +let mul2 x y = + let halfsize = size / 2 in + let halfmask = pred (shift_left 1n halfsize) in + let xl = logand x halfmask and xh = shift_right_logical x halfsize in + let yl = logand y halfmask and yh = shift_right_logical y halfsize in + add2 (mul xh yh, 0n) + (add2 (shl2 (0n, mul xl yh) halfsize) + (add2 (shl2 (0n, mul xh yl) halfsize) + (0n, mul xl yl))) + +let ucompare2 (xh, xl) (yh, yl) = + let c = ucompare xh yh in if c = 0 then ucompare xl yl else c + +let validate d m p = + let md = mul2 m d in + let one2 = (0n, 1n) in + let twoszp = shl2 one2 (size + p) in + let twop1 = shl2 one2 (p + 1) in + ucompare2 twoszp md < 0 && ucompare2 md (add2 twoszp twop1) <= 0 +*) + +let rec div_int c1 c2 dbg = + match (c1, c2) with + (c1, Cconst_int 0) -> + Csequence(c1, Cop(Craise (Raise_regular, dbg), + [Cconst_symbol "caml_exn_Division_by_zero"])) + | (c1, Cconst_int 1) -> + c1 + | (Cconst_int 0 as c1, c2) -> + Csequence(c2, c1) + | (Cconst_int n1, Cconst_int n2) -> + Cconst_int (n1 / n2) + | (c1, Cconst_int n) when n <> min_int -> + let l = Misc.log2 n in + if n = 1 lsl l then + (* Algorithm: + t = shift-right-signed(c1, l - 1) + t = shift-right(t, W - l) + t = c1 + t + res = shift-right-signed(c1 + t, l) + *) + Cop(Casr, [bind "dividend" c1 (fun c1 -> + let t = asr_int c1 (Cconst_int (l - 1)) in + let t = lsr_int t (Cconst_int (Nativeint.size - l)) in + add_int c1 t); + Cconst_int l]) + else if n < 0 then + sub_int (Cconst_int 0) (div_int c1 (Cconst_int (-n)) dbg) + else begin + let (m, p) = divimm_parameters (Nativeint.of_int n) in + (* Algorithm: + t = multiply-high-signed(c1, m) + if m < 0, t = t + c1 + if p > 0, t = shift-right-signed(t, p) + res = t + sign-bit(c1) + *) + bind "dividend" c1 (fun c1 -> + let t = Cop(Cmulhi, [c1; Cconst_natint m]) in + let t = if m < 0n then Cop(Caddi, [t; c1]) else t in + let t = if p > 0 then Cop(Casr, [t; Cconst_int p]) else t in + add_int t (lsr_int c1 (Cconst_int (Nativeint.size - 1)))) + end + | (c1, c2) when !Clflags.fast -> + Cop(Cdivi, [c1; c2]) + | (c1, c2) -> + bind "divisor" c2 (fun c2 -> + Cifthenelse(c2, + Cop(Cdivi, [c1; c2]), + Cop(Craise (Raise_regular, dbg), + [Cconst_symbol "caml_exn_Division_by_zero"]))) + +let mod_int c1 c2 dbg = + match (c1, c2) with + (c1, Cconst_int 0) -> + Csequence(c1, Cop(Craise (Raise_regular, dbg), + [Cconst_symbol "caml_exn_Division_by_zero"])) + | (c1, Cconst_int 1) -> + c1 + | (Cconst_int(0 | 1) as c1, c2) -> + Csequence(c2, c1) + | (Cconst_int n1, Cconst_int n2) -> + Cconst_int (n1 mod n2) + | (c1, (Cconst_int n as c2)) when n <> min_int -> + let l = Misc.log2 n in + if n = 1 lsl l then + (* Algorithm: + t = shift-right-signed(c1, l - 1) + t = shift-right(t, W - l) + t = c1 + t + t = bit-and(t, -n) + res = c1 - t + *) + bind "dividend" c1 (fun c1 -> + let t = asr_int c1 (Cconst_int (l - 1)) in + let t = lsr_int t (Cconst_int (Nativeint.size - l)) in + let t = add_int c1 t in + let t = Cop(Cand, [t; Cconst_int (-n)]) in + sub_int c1 t) + else + bind "dividend" c1 (fun c1 -> + sub_int c1 (mul_int (div_int c1 c2 dbg) c2)) + | (c1, c2) when !Clflags.fast -> + Cop(Cmodi, [c1; c2]) + | (c1, c2) -> + bind "divisor" c2 (fun c2 -> + Cifthenelse(c2, + Cop(Cmodi, [c1; c2]), + Cop(Craise (Raise_regular, dbg), + [Cconst_symbol "caml_exn_Division_by_zero"]))) (* Division or modulo on boxed integers. The overflow case min_int / -1 can occur, in which case we force x / -1 = -x and x mod -1 = 0. (PR#5513). *) +let is_different_from x = function + Cconst_int n -> n <> x + | Cconst_natint n -> n <> Nativeint.of_int x + | _ -> false + let safe_divmod_bi mkop mkm1 c1 c2 bi dbg = bind "dividend" c1 (fun c1 -> bind "divisor" c2 (fun c2 -> - let c3 = - if Arch.division_crashes_on_overflow - && (size_int = 4 || bi <> Pint32) - && not (is_different_from (-1) c2) - then - Cifthenelse(Cop(Ccmpi Cne, [c2; Cconst_int(-1)]), mkop c1 c2, mkm1 c1) - else - mkop c1 c2 in - if !Clflags.fast || is_different_from 0 c2 then - c3 - else - Cifthenelse(c2, c3, - Cop(Craise (Raise_regular, dbg), - [Cconst_symbol "caml_exn_Division_by_zero"])))) + let c = mkop c1 c2 dbg in + if Arch.division_crashes_on_overflow + && (size_int = 4 || bi <> Pint32) + && not (is_different_from (-1) c2) + then Cifthenelse(Cop(Ccmpi Cne, [c2; Cconst_int(-1)]), c, mkm1 c1) + else c)) let safe_div_bi = - safe_divmod_bi (fun c1 c2 -> Cop(Cdivi, [c1;c2])) - (fun c1 -> Cop(Csubi, [Cconst_int 0; c1])) + safe_divmod_bi div_int (fun c1 -> Cop(Csubi, [Cconst_int 0; c1])) let safe_mod_bi = - safe_divmod_bi (fun c1 c2 -> Cop(Cmodi, [c1;c2])) - (fun c1 -> Cconst_int 0) + safe_divmod_bi mod_int (fun c1 -> Cconst_int 0) (* Bool *) @@ -1426,11 +1593,9 @@ and transl_prim_2 p arg1 arg2 dbg = | Pmulint -> incr_int(mul_int (decr_int(transl arg1)) (untag_int(transl arg2))) | Pdivint -> - tag_int(safe_divmod Cdivi (untag_int(transl arg1)) - (untag_int(transl arg2)) dbg) + tag_int(div_int (untag_int(transl arg1)) (untag_int(transl arg2)) dbg) | Pmodint -> - tag_int(safe_divmod Cmodi (untag_int(transl arg1)) - (untag_int(transl arg2)) dbg) + tag_int(mod_int (untag_int(transl arg1)) (untag_int(transl arg2)) dbg) | Pandint -> Cop(Cand, [transl arg1; transl arg2]) | Porint -> diff --git a/asmcomp/i386/emit.mlp b/asmcomp/i386/emit.mlp index a199e9fb5..3c77529ab 100644 --- a/asmcomp/i386/emit.mlp +++ b/asmcomp/i386/emit.mlp @@ -615,6 +615,8 @@ let emit_instr fallthrough i = | Lop(Iintop(Ilsl | Ilsr | Iasr as op)) -> (* We have i.arg.(0) = i.res.(0) and i.arg.(1) = %ecx *) ` {emit_string(instr_for_intop op)} %cl, {emit_reg i.res.(0)}\n` + | Lop(Iintop Imulh) -> + ` imull {emit_reg i.arg.(1)}\n` | Lop(Iintop op) -> (* We have i.arg.(0) = i.res.(0) *) ` {emit_string(instr_for_intop op)} {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n` @@ -624,62 +626,6 @@ let emit_instr fallthrough i = ` incl {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) -> ` decl {emit_reg i.res.(0)}\n` - | Lop(Iintop_imm(Idiv, n)) -> - (* Note: i.arg.(0) = i.res.(0) = ecx (cf. selection.ml) *) - let l = Misc.log2 n in - if n = 1 lsl l then begin - let lbl = new_label() in - output_test_zero i.arg.(0); - ` jge {emit_label lbl}\n`; - ` addl ${emit_int(n-1)}, {emit_reg i.arg.(0)}\n`; - `{emit_label lbl}: sarl ${emit_int l}, {emit_reg i.arg.(0)}\n` - end else begin - let (m, p) = Selectgen.divimm_parameters (Nativeint.of_int n) in - (* Algorithm: - t = multiply-high-signed(arg, m) - if m < 0, t = t + m - t = shift-right-signed(t, p) - res = t + sign-bit(arg) - *) - ` movl ${emit_nativeint m}, %edx\n`; - ` movl %ecx, %eax\n`; - ` imull %edx\n`; (* edx = high 32 bits of arg * m, signed *) - if m < 0n then - ` addl %ecx, %edx\n`; - if p > 0 then - ` sarl ${emit_int p}, %edx\n`; - ` shrl $31, %ecx\n`; - ` addl %edx, %ecx\n` - end - | Lop(Iintop_imm(Imod, n)) -> - (* Note: i.arg.(0) = i.res.(0) = rcx (cf. selection.ml) *) - let l = Misc.log2 n in - if n = 1 lsl l then begin - let lbl = new_label() in - ` movl {emit_reg i.arg.(0)}, %eax\n`; - ` testl %eax, %eax\n`; - ` jge {emit_label lbl}\n`; - ` addl ${emit_int(n-1)}, %eax\n`; - `{emit_label lbl}: andl ${emit_int(-n)}, %eax\n`; - ` subl %eax, {emit_reg i.arg.(0)}\n` - end else begin - let (m, p) = Selectgen.divimm_parameters (Nativeint.of_int n) in - (* Compute quotient as in the Idiv immediate case, but - compute it in edx, preserving ecx *) - ` movl ${emit_nativeint m}, %edx\n`; - ` movl %ecx, %eax\n`; - ` imull %edx\n`; - if m < 0n then - ` addl %ecx, %edx\n`; - if p > 0 then - ` sarl ${emit_int p}, %edx\n`; - ` movl %ecx, %eax\n`; - ` shrl $31, %eax\n`; - ` addl %eax, %edx\n`; - (* Compute remainder via Euclid's equality *) - ` imull ${emit_int n}, %edx\n`; - ` subl %edx, %ecx\n` - end | Lop(Iintop_imm(op, n)) -> (* We have i.arg.(0) = i.res.(0) *) ` {emit_string(instr_for_intop op)} ${emit_int n}, {emit_reg i.res.(0)}\n` diff --git a/asmcomp/i386/emit_nt.mlp b/asmcomp/i386/emit_nt.mlp index 79a127812..145241d95 100644 --- a/asmcomp/i386/emit_nt.mlp +++ b/asmcomp/i386/emit_nt.mlp @@ -565,6 +565,8 @@ let emit_instr i = | Lop(Iintop(Ilsl | Ilsr | Iasr as op)) -> (* We have i.arg.(0) = i.res.(0) and i.arg.(1) = %ecx *) ` {emit_string(instr_for_intop op)} {emit_reg i.res.(0)}, cl\n` + | Lop(Iintop Imulh) -> + ` imul {emit_reg i.arg.(1)}\n` | Lop(Iintop op) -> (* We have i.arg.(0) = i.res.(0) *) ` {emit_string(instr_for_intop op)} {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}\n` @@ -574,61 +576,6 @@ let emit_instr i = ` inc {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) -> ` dec {emit_reg i.res.(0)}\n` - | Lop(Iintop_imm(Idiv, n)) -> - (* Note: i.arg.(0) = i.res.(0) = ecx (cf. selection.ml) *) - let l = Misc.log2 n in - if n = 1 lsl l then begin - let lbl = new_label() in - output_test_zero i.arg.(0); - ` jge {emit_label lbl}\n`; - ` add {emit_reg i.arg.(0)}, {emit_int(n-1)}\n`; - `{emit_label lbl}: sar {emit_reg i.arg.(0)}, {emit_int l}\n` - end else begin - let (m, p) = Selectgen.divimm_parameters (Nativeint.of_int n) in - (* Algorithm: - t = multiply-high-signed(arg, m) - if m < 0, t = t + m - t = shift-right-signed(t, p) - res = t + sign-bit(arg) - *) - ` mov edx, {emit_nativeint m}\n`; - ` mov eax, ecx\n`; - ` imul edx\n`; (* edx = high 32 bits of arg * m, signed *) - if m < 0n then - ` add edx, ecx\n`; - if p > 0 then - ` sar edx, {emit_int p}\n`; - ` shr ecx, 31\n`; - ` add ecx, edx\n` - end - | Lop(Iintop_imm(Imod, n)) -> - let l = Misc.log2 n in - if n = 1 lsl l then begin - let lbl = new_label() in - ` mov eax, {emit_reg i.arg.(0)}\n`; - ` test eax, eax\n`; - ` jge {emit_label lbl}\n`; - ` add eax, {emit_int(n-1)}\n`; - `{emit_label lbl}: and eax, {emit_int(-n)}\n`; - ` sub {emit_reg i.arg.(0)}, eax\n` - end else begin - let (m, p) = Selectgen.divimm_parameters (Nativeint.of_int n) in - (* Compute quotient as in the Idiv immediate case, but - compute it in edx, preserving ecx *) - ` mov edx, {emit_nativeint m}\n`; - ` mov eax, ecx\n`; - ` imul edx\n`; (* edx = high 32 bits of arg * m, signed *) - if m < 0n then - ` add edx, ecx\n`; - if p > 0 then - ` sar edx, {emit_int p}\n`; - ` mov eax, ecx\n`; - ` shr eax, 31\n`; - ` add edx, eax\n`; - (* Compute remainder via Euclid's equality *) - ` imul edx, {emit_int n}\n`; - ` sub ecx, edx\n` - end | Lop(Iintop_imm(op, n)) -> (* We have i.arg.(0) = i.res.(0) *) ` {emit_string(instr_for_intop op)} {emit_reg i.res.(0)}, {emit_int n}\n` diff --git a/asmcomp/i386/proc.ml b/asmcomp/i386/proc.ml index eba767c29..d80d18208 100644 --- a/asmcomp/i386/proc.ml +++ b/asmcomp/i386/proc.ml @@ -162,9 +162,8 @@ let destroyed_at_c_call = (* ebx, esi, edi, ebp preserved *) let destroyed_at_oper = function Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs | Iop(Iextcall(_, false)) -> destroyed_at_c_call - | Iop(Iintop(Idiv | Imod)) - | Iop(Iintop_imm((Idiv | Imod), _)) -> [| eax; edx |] - | Iop(Ialloc _) -> [| eax |] + | Iop(Iintop(Idiv | Imod)) -> [| eax; edx |] + | Iop(Ialloc _ | Iintop Imulh) -> [| eax |] | Iop(Iintop(Icomp _) | Iintop_imm(Icomp _, _)) -> [| eax |] | Iop(Iintoffloat) -> [| eax |] | Iifthenelse(Ifloattest(_, _), _, _) -> [| eax |] diff --git a/asmcomp/i386/reload.ml b/asmcomp/i386/reload.ml index 623d12a84..bc1b08f59 100644 --- a/asmcomp/i386/reload.ml +++ b/asmcomp/i386/reload.ml @@ -57,9 +57,11 @@ method! reload_operation op arg res = if stackp arg.(0) then let r = self#makereg arg.(0) in ([|r|], [|r|]) else (arg, res) - | Iintop(Ilsl|Ilsr|Iasr) | Iintop_imm(_, _) | Ifloatofint | Iintoffloat | - Ispecific(Ipush) -> + | Iintop(Imulh | Ilsl | Ilsr | Iasr) | Iintop_imm(_, _) + | Ifloatofint | Iintoffloat | Ispecific(Ipush) -> (* The argument(s) can be either in register or on stack *) + (* Note: Imulh: arg(0 and res(0) already forced in regs + Ilsl, Ilsr, Iasr: arg(1) already forced in regs *) (arg, res) | _ -> (* Other operations: all args and results in registers *) super#reload_operation op arg res diff --git a/asmcomp/i386/selection.ml b/asmcomp/i386/selection.ml index ed2c367f0..473499f36 100644 --- a/asmcomp/i386/selection.ml +++ b/asmcomp/i386/selection.ml @@ -112,6 +112,10 @@ let pseudoregs_for_operation op arg res = (* Two-address unary operations *) | Iintop_imm((Iadd|Isub|Imul|Iand|Ior|Ixor|Ilsl|Ilsr|Iasr), _) -> (res, res, false) + (* For imull, first arg must be in eax, eax is clobbered, and result is in + edx. *) + | Iintop(Imulh) -> + ([| eax; arg.(1) |], [| edx |], true) (* For shifts with variable shift count, second arg must be in ecx *) | Iintop(Ilsl|Ilsr|Iasr) -> ([|res.(0); ecx|], res, false) @@ -122,10 +126,6 @@ let pseudoregs_for_operation op arg res = ([| eax; ecx |], [| eax |], true) | Iintop(Imod) -> ([| eax; ecx |], [| edx |], true) - (* For div and mod with immediate operand, arg must not be in eax nor edx. - Keep it simple, force it in ecx. *) - | Iintop_imm((Idiv|Imod), _) -> - ([| ecx |], [| ecx |], true) (* For floating-point operations and floating-point loads, the result is always left at the top of the floating-point stack *) | Iconst_float _ | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf @@ -202,19 +202,6 @@ method! select_operation op args = | (Iindexed2 0, _) -> super#select_operation op args | (addr, arg) -> (Ispecific(Ilea addr), [arg]) end - (* Recognize (x / cst) and (x % cst) only if cst is > 0. *) - | Cdivi -> - begin match args with - [arg1; Cconst_int n] when n > 0 -> - (Iintop_imm(Idiv, n), [arg1]) - | _ -> (Iintop Idiv, args) - end - | Cmodi -> - begin match args with - [arg1; Cconst_int n] when n > 0 -> - (Iintop_imm(Imod, n), [arg1]) - | _ -> (Iintop Imod, args) - end (* Recognize float arithmetic with memory. In passing, apply Ershov's algorithm to reduce stack usage *) | Caddf -> @@ -241,6 +228,9 @@ method! select_operation op args = | Cextcall(fn, ty_res, false, dbg) when !fast_math && List.mem fn inline_float_ops -> (Ispecific(Ifloatspecial fn), args) + (* i386 does not support immediate operands for multiply high signed *) + | Cmulhi -> + (Iintop Imulh, args) (* Default *) | _ -> super#select_operation op args diff --git a/asmcomp/mach.ml b/asmcomp/mach.ml index 5cddd1676..58d0c1076 100644 --- a/asmcomp/mach.ml +++ b/asmcomp/mach.ml @@ -17,7 +17,7 @@ type integer_comparison = | Iunsigned of Cmm.comparison type integer_operation = - Iadd | Isub | Imul | Idiv | Imod + Iadd | Isub | Imul | Imulh | Idiv | Imod | Iand | Ior | Ixor | Ilsl | Ilsr | Iasr | Icomp of integer_comparison | Icheckbound diff --git a/asmcomp/mach.mli b/asmcomp/mach.mli index 837fd7d71..03028b2ca 100644 --- a/asmcomp/mach.mli +++ b/asmcomp/mach.mli @@ -17,7 +17,7 @@ type integer_comparison = | Iunsigned of Cmm.comparison type integer_operation = - Iadd | Isub | Imul | Idiv | Imod + Iadd | Isub | Imul | Imulh | Idiv | Imod | Iand | Ior | Ixor | Ilsl | Ilsr | Iasr | Icomp of integer_comparison | Icheckbound diff --git a/asmcomp/power/emit.mlp b/asmcomp/power/emit.mlp index e669c9005..e82f5ff79 100644 --- a/asmcomp/power/emit.mlp +++ b/asmcomp/power/emit.mlp @@ -266,15 +266,16 @@ let name_for_int_comparison = function (* Names for various instructions *) let name_for_intop = function - Iadd -> "add" - | Imul -> if ppc64 then "mulld" else "mullw" - | Idiv -> if ppc64 then "divd" else "divw" - | Iand -> "and" - | Ior -> "or" - | Ixor -> "xor" - | Ilsl -> if ppc64 then "sld" else "slw" - | Ilsr -> if ppc64 then "srd" else "srw" - | Iasr -> if ppc64 then "srad" else "sraw" + Iadd -> "add" + | Imul - > if ppc64 then "mulld" else "mullw" + | Imulh -> if ppc64 then "mulhd" else "mulhw" + | Idiv -> if ppc64 then "divd" else "divw" + | Iand -> "and" + | Ior -> "or" + | Ixor -> "xor" + | Ilsl -> if ppc64 then "sld" else "slw" + | Ilsr -> if ppc64 then "srd" else "srw" + | Iasr -> if ppc64 then "srad" else "sraw" | _ -> Misc.fatal_error "Emit.Intop" let name_for_intop_imm = function @@ -344,8 +345,6 @@ let instr_size = function | Lop(Iintop Imod) -> 3 | Lop(Iintop(Icomp cmp)) -> 4 | Lop(Iintop op) -> 1 - | Lop(Iintop_imm(Idiv, n)) -> 2 - | Lop(Iintop_imm(Imod, n)) -> 4 | Lop(Iintop_imm(Icomp cmp, n)) -> 4 | Lop(Iintop_imm(op, n)) -> 1 | Lop(Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf) -> 1 @@ -605,16 +604,6 @@ let rec emit_instr i dslot = ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` | Lop(Iintop_imm(Isub, n)) -> ` addi {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int(-n)}\n` - | Lop(Iintop_imm(Idiv, n)) -> (* n is guaranteed to be a power of 2 *) - let l = Misc.log2 n in - ` {emit_string sragi} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int l}\n`; - ` addze {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` - | Lop(Iintop_imm(Imod, n)) -> (* n is guaranteed to be a power of 2 *) - let l = Misc.log2 n in - ` {emit_string sragi} {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_int l}\n`; - ` addze {emit_gpr 0}, {emit_gpr 0}\n`; - ` {emit_string slgi} {emit_gpr 0}, {emit_gpr 0}, {emit_int l}\n`; - ` subfc {emit_reg i.res.(0)}, {emit_gpr 0}, {emit_reg i.arg.(0)}\n` | Lop(Iintop_imm(Icomp cmp, n)) -> begin match cmp with Isigned c -> diff --git a/asmcomp/power/scheduling.ml b/asmcomp/power/scheduling.ml index e4a575e0a..6e594f028 100644 --- a/asmcomp/power/scheduling.ml +++ b/asmcomp/power/scheduling.ml @@ -26,7 +26,7 @@ method oper_latency = function | Iload(_, _) -> 2 | Iconst_float _ -> 2 (* turned into a load *) | Iconst_symbol _ -> 1 - | Iintop Imul -> 9 + | Iintop(Imul | Imulh) -> 9 | Iintop_imm(Imul, _) -> 5 | Iintop(Idiv | Imod) -> 36 | Iaddf | Isubf -> 4 @@ -48,8 +48,6 @@ method oper_issue_cycles = function | Ialloc _ -> 4 | Iintop(Imod) -> 40 (* assuming full stall *) | Iintop(Icomp _) -> 4 - | Iintop_imm(Idiv, _) -> 2 - | Iintop_imm(Imod, _) -> 4 | Iintop_imm(Icomp _, _) -> 4 | Ifloatofint -> 9 | Iintoffloat -> 4 diff --git a/asmcomp/power/selection.ml b/asmcomp/power/selection.ml index a68c63fcc..86aea05f4 100644 --- a/asmcomp/power/selection.ml +++ b/asmcomp/power/selection.ml @@ -61,16 +61,8 @@ method select_addressing chunk exp = method! select_operation op args = match (op, args) with - (* Prevent the recognition of (x / cst) and (x % cst) when cst is not - a power of 2, which do not correspond to an instruction. *) - (Cdivi, [arg; Cconst_int n]) when n = 1 lsl (Misc.log2 n) -> - (Iintop_imm(Idiv, n), [arg]) - | (Cdivi, _) -> - (Iintop Idiv, args) - | (Cmodi, [arg; Cconst_int n]) when n = 1 lsl (Misc.log2 n) -> - (Iintop_imm(Imod, n), [arg]) - | (Cmodi, _) -> - (Iintop Imod, args) + (* PowerPC does not support immediate operands for multiply high *) + (Cmulhi, _) -> (Iintop Imulh, args) (* The and, or and xor instructions have a different range of immediate operands than the other instructions *) | (Cand, _) -> self#select_logical Iand args diff --git a/asmcomp/printcmm.ml b/asmcomp/printcmm.ml index b89937a29..f1c9243a0 100644 --- a/asmcomp/printcmm.ml +++ b/asmcomp/printcmm.ml @@ -60,6 +60,7 @@ let operation = function | Caddi -> "+" | Csubi -> "-" | Cmuli -> "*" + | Cmulhi -> "*h" | Cdivi -> "/" | Cmodi -> "mod" | Cand -> "and" diff --git a/asmcomp/printmach.ml b/asmcomp/printmach.ml index 395272d45..f260c3df7 100644 --- a/asmcomp/printmach.ml +++ b/asmcomp/printmach.ml @@ -70,6 +70,7 @@ let intop = function | Iadd -> " + " | Isub -> " - " | Imul -> " * " + | Imulh -> " *h " | Idiv -> " div " | Imod -> " mod " | Iand -> " & " diff --git a/asmcomp/selectgen.ml b/asmcomp/selectgen.ml index 5e47efe51..8f1277a17 100644 --- a/asmcomp/selectgen.ml +++ b/asmcomp/selectgen.ml @@ -33,7 +33,7 @@ let oper_result_type = function end | Calloc -> typ_addr | Cstore c -> typ_void - | Caddi | Csubi | Cmuli | Cdivi | Cmodi | + | Caddi | Csubi | Cmuli | Cmulhi | Cdivi | Cmodi | Cand | Cor | Cxor | Clsl | Clsr | Casr | Ccmpi _ | Ccmpa _ | Ccmpf _ -> typ_int | Cadda | Csuba -> typ_addr @@ -231,19 +231,10 @@ method select_operation op args = | (Calloc, _) -> (Ialloc 0, args) | (Caddi, _) -> self#select_arith_comm Iadd args | (Csubi, _) -> self#select_arith Isub args - | (Cmuli, [arg1; Cconst_int n]) -> - let l = Misc.log2 n in - if n = 1 lsl l - then (Iintop_imm(Ilsl, l), [arg1]) - else self#select_arith_comm Imul args - | (Cmuli, [Cconst_int n; arg1]) -> - let l = Misc.log2 n in - if n = 1 lsl l - then (Iintop_imm(Ilsl, l), [arg1]) - else self#select_arith_comm Imul args | (Cmuli, _) -> self#select_arith_comm Imul args - | (Cdivi, _) -> self#select_arith Idiv args - | (Cmodi, _) -> self#select_arith_comm Imod args + | (Cmulhi, _) -> self#select_arith_comm Imulh args + | (Cdivi, _) -> (Iintop Idiv, args) + | (Cmodi, _) -> (Iintop Imod, args) | (Cand, _) -> self#select_arith_comm Iand args | (Cor, _) -> self#select_arith_comm Ior args | (Cxor, _) -> self#select_arith_comm Ixor args @@ -835,92 +826,3 @@ let is_tail_call nargs = let _ = Simplif.is_tail_native_heuristic := is_tail_call - -(* Turning integer divisions into multiply-high then shift. - The [division_parameters] function is used in module Emit for - those target platforms that support this optimization. *) - -(* Unsigned comparison between native integers. *) - -let ucompare x y = Nativeint.(compare (add x min_int) (add y min_int)) - -(* Unsigned division and modulus at type nativeint. - Algorithm: Hacker's Delight section 9.3 *) - -let udivmod n d = Nativeint.( - if d < 0n then - if ucompare n d < 0 then (0n, n) else (1n, sub n d) - else begin - let q = shift_left (div (shift_right_logical n 1) d) 1 in - let r = sub n (mul q d) in - if ucompare r d >= 0 then (succ q, sub r d) else (q, r) - end) - -(* Compute division parameters. - Algorithm: Hacker's Delight chapter 10, fig 10-1. *) - -let divimm_parameters d = Nativeint.( - assert (d > 0n); - let twopsm1 = min_int in (* 2^31 for 32-bit archs, 2^63 for 64-bit archs *) - let nc = sub (pred twopsm1) (snd (udivmod twopsm1 d)) in - let rec loop p (q1, r1) (q2, r2) = - let p = p + 1 in - let q1 = shift_left q1 1 and r1 = shift_left r1 1 in - let (q1, r1) = - if ucompare r1 nc >= 0 then (succ q1, sub r1 nc) else (q1, r1) in - let q2 = shift_left q2 1 and r2 = shift_left r2 1 in - let (q2, r2) = - if ucompare r2 d >= 0 then (succ q2, sub r2 d) else (q2, r2) in - let delta = sub d r2 in - if ucompare q1 delta < 0 || (q1 = delta && r1 = 0n) - then loop p (q1, r1) (q2, r2) - else (succ q2, p - size) - in loop (size - 1) (udivmod twopsm1 nc) (udivmod twopsm1 d)) - -(* The result [(m, p)] of [divimm_parameters d] satisfies the following - inequality: - - 2^(wordsize + p) < m * d <= 2^(wordsize + p) + 2^(p + 1) (i) - - from which it follows that - - floor(n / d) = floor(n * m / 2^(wordsize+p)) - if 0 <= n < 2^(wordsize-1) - ceil(n / d) = floor(n * m / 2^(wordsize+p)) + 1 - if -2^(wordsize-1) <= n < 0 - - The correctness condition (i) above can be checked by the code below. - It was exhaustively tested for values of d from 2 to 10^9 in the - wordsize = 64 case. - -let add2 (xh, xl) (yh, yl) = - let zl = add xl yl and zh = add xh yh in - ((if ucompare zl xl < 0 then succ zh else zh), zl) - -let shl2 (xh, xl) n = - assert (0 < n && n < size + size); - if n < size - then (logor (shift_left xh n) (shift_right_logical xl (size - n)), - shift_left xl n) - else (shift_left xl (n - size), 0n) - -let mul2 x y = - let halfsize = size / 2 in - let halfmask = pred (shift_left 1n halfsize) in - let xl = logand x halfmask and xh = shift_right_logical x halfsize in - let yl = logand y halfmask and yh = shift_right_logical y halfsize in - add2 (mul xh yh, 0n) - (add2 (shl2 (0n, mul xl yh) halfsize) - (add2 (shl2 (0n, mul xh yl) halfsize) - (0n, mul xl yl))) - -let ucompare2 (xh, xl) (yh, yl) = - let c = ucompare xh yh in if c = 0 then ucompare xl yl else c - -let validate d m p = - let md = mul2 m d in - let one2 = (0n, 1n) in - let twoszp = shl2 one2 (size + p) in - let twop1 = shl2 one2 (p + 1) in - ucompare2 twoszp md < 0 && ucompare2 md (add2 twoszp twop1) <= 0 -*) diff --git a/asmcomp/selectgen.mli b/asmcomp/selectgen.mli index 6fa6b3e1f..11af7c1ff 100644 --- a/asmcomp/selectgen.mli +++ b/asmcomp/selectgen.mli @@ -76,5 +76,3 @@ class virtual selector_generic : object (Ident.t, Reg.t array) Tbl.t -> Cmm.expression -> Reg.t array option method emit_tail : (Ident.t, Reg.t array) Tbl.t -> Cmm.expression -> unit end - -val divimm_parameters: nativeint -> nativeint * int diff --git a/asmcomp/sparc/emit.mlp b/asmcomp/sparc/emit.mlp index 6c7e86da8..1d0699fb4 100644 --- a/asmcomp/sparc/emit.mlp +++ b/asmcomp/sparc/emit.mlp @@ -443,36 +443,15 @@ let rec emit_instr i dslot = ` sra {emit_reg i.arg.(0)}, 31, %g1\n`; ` wr %g1, %y\n`; ` sdiv {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n` + | Lop(Iintop Imulh) -> + ` smul {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, %g1\n`; + ` rd %y, {emit_reg i.res.(0)}\n` | Lop(Iintop op) -> let instr = name_for_int_operation op in ` {emit_string instr} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Ilsl, 1)) -> (* UltraSPARC has two add units but only one shifter. *) ` add {emit_reg i.arg.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n` - | Lop(Iintop_imm(Idiv, n)) -> - let l = Misc.log2 n in - if n = 1 lsl l then begin - let lbl = new_label() in - ` cmp {emit_reg i.arg.(0)}, 0\n`; - ` bge {emit_label lbl}\n`; - ` mov {emit_reg i.arg.(0)}, %g1\n`; (* in delay slot *) - ` add %g1, {emit_int (n-1)}, %g1\n`; - `{emit_label lbl}:\n`; - ` sra %g1, {emit_int l}, {emit_reg i.res.(0)}\n` - end else begin - ` sra {emit_reg i.arg.(0)}, 31, %g1\n`; - ` wr %g1, %y\n`; - ` sdiv {emit_reg i.arg.(0)}, {emit_int n}, {emit_reg i.res.(0)}\n` - end - | Lop(Iintop_imm(Imod, n)) -> (* n is a power of 2 *) - let lbl = new_label() in - ` tst {emit_reg i.arg.(0)}\n`; - ` bge {emit_label lbl}\n`; - ` andcc {emit_reg i.arg.(0)}, {emit_int (n-1)}, {emit_reg i.res.(0)}\n`; (* in delay slot *) - ` be {emit_label lbl}\n`; - ` nop\n`; - ` sub {emit_reg i.res.(0)}, {emit_int n}, {emit_reg i.res.(0)}\n`; - `{emit_label lbl}:\n` | Lop(Iintop_imm(Icomp cmp, n)) -> ` cmp {emit_reg i.arg.(0)}, {emit_int n}\n`; if !arch_version = SPARC_V9 then begin @@ -496,6 +475,9 @@ let rec emit_instr i dslot = ` bleu {emit_label !range_check_trap}\n`; ` nop\n` (* delay slot *) end + | Lop(Iintop_imm(Imulh, n)) -> + ` smul {emit_reg i.arg.(0)}, {emit_int n}, %g1\n`; + ` rd %y, {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(op, n)) -> let instr = name_for_int_operation op in ` {emit_string instr} {emit_reg i.arg.(0)}, {emit_int n}, {emit_reg i.res.(0)}\n` @@ -618,7 +600,7 @@ and fill_delay_slot = function that does not branch. *) let is_one_instr_op = function - Idiv | Imod | Icomp _ | Icheckbound -> false + Imulh | Idiv | Imod | Icomp _ | Icheckbound -> false | _ -> true let is_one_instr i = diff --git a/asmcomp/sparc/scheduling.ml b/asmcomp/sparc/scheduling.ml index 048880abd..497722bbc 100644 --- a/asmcomp/sparc/scheduling.ml +++ b/asmcomp/sparc/scheduling.ml @@ -47,8 +47,6 @@ method oper_issue_cycles = function | Ialloc _ -> 6 | Iintop(Icomp _) -> 4 | Iintop(Icheckbound) -> 2 - | Iintop_imm(Idiv, _) -> 5 - | Iintop_imm(Imod, _) -> 5 | Iintop_imm(Icomp _, _) -> 4 | Iintop_imm(Icheckbound, _) -> 2 | Inegf -> 2 diff --git a/asmcomp/sparc/selection.ml b/asmcomp/sparc/selection.ml index 055b78f19..d938c1eff 100644 --- a/asmcomp/sparc/selection.ml +++ b/asmcomp/sparc/selection.ml @@ -38,23 +38,13 @@ method select_addressing chunk = function method! select_operation op args = match (op, args) with (* For SPARC V7 multiplication, division and modulus are turned into - calls to C library routines, except if the dividend is a power of 2. + calls to C library routines. For SPARC V8 and V9, use hardware multiplication and division, but C library routine for modulus. *) - (Cmuli, [arg; Cconst_int n]) when n = 1 lsl (Misc.log2 n) -> - (Iintop_imm(Ilsl, Misc.log2 n), [arg]) - | (Cmuli, [Cconst_int n; arg]) when n = 1 lsl (Misc.log2 n) -> - (Iintop_imm(Ilsl, Misc.log2 n), [arg]) - | (Cmuli, _) when !arch_version = SPARC_V7 -> + (Cmuli, _) when !arch_version = SPARC_V7 -> (Iextcall(".umul", false), args) - | (Cdivi, [arg; Cconst_int n]) - when self#is_immediate n && n = 1 lsl (Misc.log2 n) -> - (Iintop_imm(Idiv, n), [arg]) | (Cdivi, _) when !arch_version = SPARC_V7 -> (Iextcall(".div", false), args) - | (Cmodi, [arg; Cconst_int n]) - when self#is_immediate n && n = 1 lsl (Misc.log2 n) -> - (Iintop_imm(Imod, n), [arg]) | (Cmodi, _) -> (Iextcall(".rem", false), args) | _ -> |