diff options
-rw-r--r-- | Changes | 3 | ||||
-rw-r--r-- | asmcomp/amd64/emit.mlp | 65 | ||||
-rw-r--r-- | asmcomp/amd64/selection.ml | 14 | ||||
-rw-r--r-- | asmcomp/selectgen.ml | 89 | ||||
-rw-r--r-- | asmcomp/selectgen.mli | 2 | ||||
-rw-r--r-- | testsuite/tests/basic/divint.ml | 112 | ||||
-rw-r--r-- | testsuite/tests/basic/divint.reference | 33 |
7 files changed, 297 insertions, 21 deletions
@@ -11,12 +11,15 @@ Language features: Compilers: - Experimental native code generator for AArch64 (ARM 64 bits) +- Optimization of integer division and modulus by constant divisors + (feature wish PR#6042) - PR#6182: better message for virtual objects and class types (Leo P. White, Stephen Dolan) - PR#5817: new flag to keep locations in cmi files - PR#5854: issue warning 3 when referring to a value marked with the [@@deprecated] attribute + Bug fixes: - PR#4719: Sys.executable_name wrong if executable name contains dots (Windows) - PR#4855: 'camlp4 -I +dir' accepted, dir is relative to 'camlp4 -where' diff --git a/asmcomp/amd64/emit.mlp b/asmcomp/amd64/emit.mlp index 8dad2206a..9f5836dd6 100644 --- a/asmcomp/amd64/emit.mlp +++ b/asmcomp/amd64/emit.mlp @@ -524,21 +524,60 @@ let emit_instr fallthrough i = | Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) -> ` decq {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Idiv, n)) -> - (* Note: i.arg.(0) = i.res.(0) = rdx (cf. selection.ml) *) + (* Note: i.arg.(0) = i.res.(0) = rcx (cf. selection.ml) *) let l = Misc.log2 n in - ` movq {emit_reg i.arg.(0)}, %rax\n`; - ` addq ${emit_int(n-1)}, {emit_reg i.arg.(0)}\n`; - ` testq %rax, %rax\n`; - ` cmovns %rax, {emit_reg i.arg.(0)}\n`; - ` sarq ${emit_int l}, {emit_reg i.res.(0)}\n` + if n = 1 lsl l then begin + ` movq {emit_reg i.arg.(0)}, %rax\n`; + ` addq ${emit_int(n-1)}, {emit_reg i.arg.(0)}\n`; + ` testq %rax, %rax\n`; + ` cmovns %rax, {emit_reg i.arg.(0)}\n`; + ` sarq ${emit_int l}, {emit_reg i.res.(0)}\n` + end else begin + let (m, p) = Selectgen.divimm_parameters (Nativeint.of_int n) in + (* Algorithm: + t = multiply-high-signed(arg, m) + if m < 0, t = t + m + t = shift-right-signed(t, p) + res = t + sign-bit(arg) + *) + ` movabsq ${emit_nativeint m}, %rdx\n`; + ` movq %rcx, %rax\n`; + ` imulq %rdx\n`; (* rdx = high 64 bits of arg * m, signed *) + if m < 0n then + ` addq %rcx, %rdx\n`; + if p > 0 then + ` sarq ${emit_int p}, %rdx\n`; + ` shrq $63, %rcx\n`; + ` addq %rdx, %rcx\n` + end | Lop(Iintop_imm(Imod, n)) -> - (* Note: i.arg.(0) = i.res.(0) = rdx (cf. selection.ml) *) - ` movq {emit_reg i.arg.(0)}, %rax\n`; - ` testq %rax, %rax\n`; - ` leaq {emit_int(n-1)}(%rax), %rax\n`; - ` cmovns {emit_reg i.arg.(0)}, %rax\n`; - ` andq ${emit_int (-n)}, %rax\n`; - ` subq %rax, {emit_reg i.res.(0)}\n` + (* Note: i.arg.(0) = i.res.(0) = rcx (cf. selection.ml) *) + let l = Misc.log2 n in + if n = 1 lsl l then begin + ` movq {emit_reg i.arg.(0)}, %rax\n`; + ` testq %rax, %rax\n`; + ` leaq {emit_int(n-1)}(%rax), %rax\n`; (* flags preserved *) + ` cmovns {emit_reg i.arg.(0)}, %rax\n`; + ` andq ${emit_int (-n)}, %rax\n`; + ` subq %rax, {emit_reg i.res.(0)}\n` + end else begin + let (m, p) = Selectgen.divimm_parameters (Nativeint.of_int n) in + (* Compute quotient as in the Idiv immediate case, but + compute it in rdx, preserving rcx *) + ` movabsq ${emit_nativeint m}, %rdx\n`; + ` movq %rcx, %rax\n`; + ` imulq %rdx\n`; + if m < 0n then + ` addq %rcx, %rdx\n`; + if p > 0 then + ` sarq ${emit_int p}, %rdx\n`; + ` movq %rcx, %rax\n`; + ` shrq $63, %rax\n`; + ` addq %rax, %rdx\n`; + (* Compute remainder via Euclid's equality *) + ` imulq ${emit_int n}, %rdx\n`; + ` subq %rdx, %rcx\n` + end | Lop(Iintop_imm(op, n)) -> (* We have i.arg.(0) = i.res.(0) *) ` {emit_string(instr_for_intop op)} ${emit_int n}, {emit_reg i.res.(0)}\n` diff --git a/asmcomp/amd64/selection.ml b/asmcomp/amd64/selection.ml index 4de841287..97e0e84de 100644 --- a/asmcomp/amd64/selection.ml +++ b/asmcomp/amd64/selection.ml @@ -105,10 +105,10 @@ let pseudoregs_for_operation op arg res = ([| rax; rcx |], [| rax |]) | Iintop(Imod) -> ([| rax; rcx |], [| rdx |]) - (* For div and mod with immediate operand, arg must not be in rax. - Keep it simple, force it in rdx. *) + (* For div and mod with immediate operand, arg must not be in rax nor rdx. + Keep it simple, force it in rcx. *) | Iintop_imm((Idiv|Imod), _) -> - ([| rdx |], [| rdx |]) + ([| rcx |], [| rcx |]) (* Other instructions are regular *) | _ -> raise Use_default @@ -176,18 +176,16 @@ method! select_operation op args = | (Iindexed2 0, _) -> super#select_operation op args | (addr, arg) -> (Ispecific(Ilea addr), [arg]) end - (* Recognize (x / cst) and (x % cst) only if cst is a power of 2. *) + (* Recognize (x / cst) and (x % cst) only if cst is > 0. *) | Cdivi -> begin match args with - [arg1; Cconst_int n] when self#is_immediate n - && n = 1 lsl (Misc.log2 n) -> + [arg1; Cconst_int n] when self#is_immediate n && n > 0 -> (Iintop_imm(Idiv, n), [arg1]) | _ -> (Iintop Idiv, args) end | Cmodi -> begin match args with - [arg1; Cconst_int n] when self#is_immediate n - && n = 1 lsl (Misc.log2 n) -> + [arg1; Cconst_int n] when self#is_immediate n && n > 0 -> (Iintop_imm(Imod, n), [arg1]) | _ -> (Iintop Imod, args) end diff --git a/asmcomp/selectgen.ml b/asmcomp/selectgen.ml index 1d2bf96d2..fc2e7821d 100644 --- a/asmcomp/selectgen.ml +++ b/asmcomp/selectgen.ml @@ -835,3 +835,92 @@ let is_tail_call nargs = let _ = Simplif.is_tail_native_heuristic := is_tail_call + +(* Turning integer divisions into multiply-high then shift. + The [division_parameters] function is used in module Emit for + those target platforms that support this optimization. *) + +(* Unsigned comparison between native integers. *) + +let ucompare x y = Nativeint.(compare (add x min_int) (add y min_int)) + +(* Unsigned division and modulus at type nativeint. + Algorithm: Hacker's Delight section 9.3 *) + +let udivmod n d = Nativeint.( + if d < 0n then + if ucompare n d < 0 then (0n, n) else (1n, sub n d) + else begin + let q = shift_left (div (shift_right_logical n 1) d) 1 in + let r = sub n (mul q d) in + if ucompare r d >= 0 then (succ q, sub r d) else (q, r) + end) + +(* Compute division parameters. + Algorithm: Hacker's Delight chapter 10, fig 10-1. *) + +let divimm_parameters d = Nativeint.( + assert (d > 0n); + let twopsm1 = min_int in (* 2^31 for 32-bit archs, 2^63 for 64-bit archs *) + let nc = sub (pred twopsm1) (snd (udivmod twopsm1 d)) in + let rec loop p (q1, r1) (q2, r2) = + let p = p + 1 in + let q1 = shift_left q1 1 and r1 = shift_left r1 1 in + let (q1, r1) = + if ucompare r1 nc >= 0 then (succ q1, sub r1 nc) else (q1, r1) in + let q2 = shift_left q2 1 and r2 = shift_left r2 1 in + let (q2, r2) = + if ucompare r2 d >= 0 then (succ q2, sub r2 d) else (q2, r2) in + let delta = sub d r2 in + if ucompare q1 delta < 0 || (q1 = delta && r1 = 0n) + then loop p (q1, r1) (q2, r2) + else (succ q2, p - size) + in loop (size - 1) (udivmod twopsm1 nc) (udivmod twopsm1 d)) + +(* The result [(m, p)] of [divimm_parameters d] satisfies the following + inequality: + + 2^(wordsize + p) < m * d <= 2^(wordsize + p) + 2^(p + 1) (i) + + from which it follows that + + floor(n / d) = floor(n * m / 2^(wordsize+p)) + if 0 <= n < 2^(wordsize-1) + ceil(n / d) = floor(n * m / 2^(wordsize+p)) + 1 + if -2^(wordsize-1) <= n < 0 + + The correctness condition (i) above can be checked by the code below. + It was exhaustively tested for values of d from 2 to 10^9 in the + wordsize = 64 case. + +let add2 (xh, xl) (yh, yl) = + let zl = add xl yl and zh = add xh yh in + ((if ucompare zl xl < 0 then succ zh else zh), zl) + +let shl2 (xh, xl) n = + assert (0 < n && n < size + size); + if n < size + then (logor (shift_left xh n) (shift_right_logical xl (size - n)), + shift_left xl n) + else (shift_left xl (n - size), 0n) + +let mul2 x y = + let halfsize = size / 2 in + let halfmask = pred (shift_left 1n halfsize) in + let xl = logand x halfmask and xh = shift_right_logical x halfsize in + let yl = logand y halfmask and yh = shift_right_logical y halfsize in + add2 (mul xh yh, 0n) + (add2 (shl2 (0n, mul xl yh) halfsize) + (add2 (shl2 (0n, mul xh yl) halfsize) + (0n, mul xl yl))) + +let ucompare2 (xh, xl) (yh, yl) = + let c = ucompare xh yh in if c = 0 then ucompare xl yl else c + +let validate d m p = + let md = mul2 m d in + let one2 = (0n, 1n) in + let twoszp = shl2 one2 (size + p) in + let twop1 = shl2 one2 (p + 1) in + ucompare2 twoszp md < 0 && ucompare2 md (add2 twoszp twop1) <= 0 +*) diff --git a/asmcomp/selectgen.mli b/asmcomp/selectgen.mli index 11af7c1ff..6fa6b3e1f 100644 --- a/asmcomp/selectgen.mli +++ b/asmcomp/selectgen.mli @@ -76,3 +76,5 @@ class virtual selector_generic : object (Ident.t, Reg.t array) Tbl.t -> Cmm.expression -> Reg.t array option method emit_tail : (Ident.t, Reg.t array) Tbl.t -> Cmm.expression -> unit end + +val divimm_parameters: nativeint -> nativeint * int diff --git a/testsuite/tests/basic/divint.ml b/testsuite/tests/basic/divint.ml new file mode 100644 index 000000000..6dd4be3fc --- /dev/null +++ b/testsuite/tests/basic/divint.ml @@ -0,0 +1,112 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2013 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +open Printf + +(* Test integer division and modulus, esp. ocamlopt's optimization + when the divisor is a constant. *) + +let error = ref false + +module WithInt = struct + +let d = ref 0 +let divref n = n / !d +let modref n = n mod !d + +let test_one (df: int -> int) (mf: int -> int) x = + if not (df x = divref x && mf x = modref x) then begin + printf "mismatch for %d\n" x; + error := true + end + +let do_test divisor (df: int -> int) (mf: int -> int) = + d := divisor; + List.iter (test_one df mf) + [0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; + 100; 1000; 10000; 100000; 1000000; max_int - 1; max_int; + -1; -2; -3; -4; -5; -6; -7; -8; -9; -10; + -100; -1000; -10000; -100000; -1000000; min_int + 1; min_int]; + let seed = ref 0 in + for i = 1 to 1000 do + seed := !seed * 69069 + 25173; + test_one df mf !seed + done + +end + +module WithNat = struct + +let d = ref 0n +let divref n = Nativeint.div n !d +let modref n = Nativeint.rem n !d + +let test_one (df: nativeint -> nativeint) (mf: nativeint -> nativeint) x = + if not (df x = divref x && mf x = modref x) then begin + printf "mismatch for %nd\n" x; + error := true + end + +let do_test divisor (df: nativeint -> nativeint) (mf: nativeint -> nativeint) = + d := Nativeint.of_int divisor; + List.iter (test_one df mf) + [0n; 1n; 2n; 3n; 4n; 5n; 6n; 7n; 8n; 9n; 10n; + 100n; 1000n; 10000n; 100000n; 1000000n; + Nativeint.(pred max_int); Nativeint.max_int; + -1n; -2n; -3n; -4n; -5n; -6n; -7n; -8n; -9n; -10n; + -100n; -1000n; -10000n; -100000n; -1000000n; + Nativeint.(succ min_int); Nativeint.min_int]; + let seed = ref 0n in + for i = 1 to 1000 do + seed := Nativeint.(add (mul !seed 69069n) 25173n); + test_one df mf !seed + done + +end + +let _ = + printf "2 int\n"; WithInt.do_test 2 (fun x -> x / 2)(fun x -> x mod 2); + printf "3 int\n"; WithInt.do_test 3 (fun x -> x / 3)(fun x -> x mod 3); + printf "4 int\n"; WithInt.do_test 4 (fun x -> x / 4)(fun x -> x mod 4); + printf "5 int\n"; WithInt.do_test 5 (fun x -> x / 5)(fun x -> x mod 5); + printf "6 int\n"; WithInt.do_test 6 (fun x -> x / 6)(fun x -> x mod 6); + printf "7 int\n"; WithInt.do_test 7 (fun x -> x / 7)(fun x -> x mod 7); + printf "9 int\n"; WithInt.do_test 9 (fun x -> x / 9)(fun x -> x mod 9); + printf "10 int\n"; WithInt.do_test 10 (fun x -> x / 10)(fun x -> x mod 10); + printf "11 int\n"; WithInt.do_test 11 (fun x -> x / 11)(fun x -> x mod 11); + printf "12 int\n"; WithInt.do_test 12 (fun x -> x / 12)(fun x -> x mod 12); + printf "25 int\n"; WithInt.do_test 25 (fun x -> x / 25)(fun x -> x mod 25); + printf "55 int\n"; WithInt.do_test 55 (fun x -> x / 55)(fun x -> x mod 55); + printf "125 int\n"; WithInt.do_test 125 (fun x -> x / 125)(fun x -> x mod 125); + printf "625 int\n"; WithInt.do_test 625 (fun x -> x / 625)(fun x -> x mod 625); + printf "-2 int\n"; WithInt.do_test (-2) (fun x -> x / (-2))(fun x -> x mod (-2)); + printf "-3 int\n"; WithInt.do_test (-3) (fun x -> x / (-3))(fun x -> x mod (-3)); + + printf "2 nat\n"; WithNat.do_test 2 (fun x -> Nativeint.div x 2n)(fun x -> Nativeint.rem x 2n); + printf "3 nat\n"; WithNat.do_test 3 (fun x -> Nativeint.div x 3n)(fun x -> Nativeint.rem x 3n); + printf "4 nat\n"; WithNat.do_test 4 (fun x -> Nativeint.div x 4n)(fun x -> Nativeint.rem x 4n); + printf "5 nat\n"; WithNat.do_test 5 (fun x -> Nativeint.div x 5n)(fun x -> Nativeint.rem x 5n); + printf "6 nat\n"; WithNat.do_test 6 (fun x -> Nativeint.div x 6n)(fun x -> Nativeint.rem x 6n); + printf "7 nat\n"; WithNat.do_test 7 (fun x -> Nativeint.div x 7n)(fun x -> Nativeint.rem x 7n); + printf "9 nat\n"; WithNat.do_test 9 (fun x -> Nativeint.div x 9n)(fun x -> Nativeint.rem x 9n); + printf "10 nat\n"; WithNat.do_test 10 (fun x -> Nativeint.div x 10n)(fun x -> Nativeint.rem x 10n); + printf "11 nat\n"; WithNat.do_test 11 (fun x -> Nativeint.div x 11n)(fun x -> Nativeint.rem x 11n); + printf "12 nat\n"; WithNat.do_test 12 (fun x -> Nativeint.div x 12n)(fun x -> Nativeint.rem x 12n); + printf "25 nat\n"; WithNat.do_test 25 (fun x -> Nativeint.div x 25n)(fun x -> Nativeint.rem x 25n); + printf "55 nat\n"; WithNat.do_test 55 (fun x -> Nativeint.div x 55n)(fun x -> Nativeint.rem x 55n); + printf "125 nat\n"; WithNat.do_test 125 (fun x -> Nativeint.div x 125n)(fun x -> Nativeint.rem x 125n); + printf "625 nat\n"; WithNat.do_test 625 (fun x -> Nativeint.div x 625n)(fun x -> Nativeint.rem x 625n); + printf "-2 nat\n"; WithNat.do_test (-2) (fun x -> Nativeint.div x (-2n))(fun x -> Nativeint.rem x (-2n)); + printf "-3 nat\n"; WithNat.do_test (-3) (fun x -> Nativeint.div x (-3n))(fun x -> Nativeint.rem x (-3n)); + + if !error then printf "TEST FAILED.\n" else printf "Test passed.\n" + diff --git a/testsuite/tests/basic/divint.reference b/testsuite/tests/basic/divint.reference new file mode 100644 index 000000000..4aa1e2110 --- /dev/null +++ b/testsuite/tests/basic/divint.reference @@ -0,0 +1,33 @@ +2 int +3 int +4 int +5 int +6 int +7 int +9 int +10 int +11 int +12 int +25 int +55 int +125 int +625 int +-2 int +-3 int +2 nat +3 nat +4 nat +5 nat +6 nat +7 nat +9 nat +10 nat +11 nat +12 nat +25 nat +55 nat +125 nat +625 nat +-2 nat +-3 nat +Test passed. |