summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Changes3
-rw-r--r--asmcomp/amd64/emit.mlp65
-rw-r--r--asmcomp/amd64/selection.ml14
-rw-r--r--asmcomp/selectgen.ml89
-rw-r--r--asmcomp/selectgen.mli2
-rw-r--r--testsuite/tests/basic/divint.ml112
-rw-r--r--testsuite/tests/basic/divint.reference33
7 files changed, 297 insertions, 21 deletions
diff --git a/Changes b/Changes
index 005126cfc..5f8320e30 100644
--- a/Changes
+++ b/Changes
@@ -11,12 +11,15 @@ Language features:
Compilers:
- Experimental native code generator for AArch64 (ARM 64 bits)
+- Optimization of integer division and modulus by constant divisors
+ (feature wish PR#6042)
- PR#6182: better message for virtual objects and class types
(Leo P. White, Stephen Dolan)
- PR#5817: new flag to keep locations in cmi files
- PR#5854: issue warning 3 when referring to a value marked with
the [@@deprecated] attribute
+
Bug fixes:
- PR#4719: Sys.executable_name wrong if executable name contains dots (Windows)
- PR#4855: 'camlp4 -I +dir' accepted, dir is relative to 'camlp4 -where'
diff --git a/asmcomp/amd64/emit.mlp b/asmcomp/amd64/emit.mlp
index 8dad2206a..9f5836dd6 100644
--- a/asmcomp/amd64/emit.mlp
+++ b/asmcomp/amd64/emit.mlp
@@ -524,21 +524,60 @@ let emit_instr fallthrough i =
| Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) ->
` decq {emit_reg i.res.(0)}\n`
| Lop(Iintop_imm(Idiv, n)) ->
- (* Note: i.arg.(0) = i.res.(0) = rdx (cf. selection.ml) *)
+ (* Note: i.arg.(0) = i.res.(0) = rcx (cf. selection.ml) *)
let l = Misc.log2 n in
- ` movq {emit_reg i.arg.(0)}, %rax\n`;
- ` addq ${emit_int(n-1)}, {emit_reg i.arg.(0)}\n`;
- ` testq %rax, %rax\n`;
- ` cmovns %rax, {emit_reg i.arg.(0)}\n`;
- ` sarq ${emit_int l}, {emit_reg i.res.(0)}\n`
+ if n = 1 lsl l then begin
+ ` movq {emit_reg i.arg.(0)}, %rax\n`;
+ ` addq ${emit_int(n-1)}, {emit_reg i.arg.(0)}\n`;
+ ` testq %rax, %rax\n`;
+ ` cmovns %rax, {emit_reg i.arg.(0)}\n`;
+ ` sarq ${emit_int l}, {emit_reg i.res.(0)}\n`
+ end else begin
+ let (m, p) = Selectgen.divimm_parameters (Nativeint.of_int n) in
+ (* Algorithm:
+ t = multiply-high-signed(arg, m)
+ if m < 0, t = t + m
+ t = shift-right-signed(t, p)
+ res = t + sign-bit(arg)
+ *)
+ ` movabsq ${emit_nativeint m}, %rdx\n`;
+ ` movq %rcx, %rax\n`;
+ ` imulq %rdx\n`; (* rdx = high 64 bits of arg * m, signed *)
+ if m < 0n then
+ ` addq %rcx, %rdx\n`;
+ if p > 0 then
+ ` sarq ${emit_int p}, %rdx\n`;
+ ` shrq $63, %rcx\n`;
+ ` addq %rdx, %rcx\n`
+ end
| Lop(Iintop_imm(Imod, n)) ->
- (* Note: i.arg.(0) = i.res.(0) = rdx (cf. selection.ml) *)
- ` movq {emit_reg i.arg.(0)}, %rax\n`;
- ` testq %rax, %rax\n`;
- ` leaq {emit_int(n-1)}(%rax), %rax\n`;
- ` cmovns {emit_reg i.arg.(0)}, %rax\n`;
- ` andq ${emit_int (-n)}, %rax\n`;
- ` subq %rax, {emit_reg i.res.(0)}\n`
+ (* Note: i.arg.(0) = i.res.(0) = rcx (cf. selection.ml) *)
+ let l = Misc.log2 n in
+ if n = 1 lsl l then begin
+ ` movq {emit_reg i.arg.(0)}, %rax\n`;
+ ` testq %rax, %rax\n`;
+ ` leaq {emit_int(n-1)}(%rax), %rax\n`; (* flags preserved *)
+ ` cmovns {emit_reg i.arg.(0)}, %rax\n`;
+ ` andq ${emit_int (-n)}, %rax\n`;
+ ` subq %rax, {emit_reg i.res.(0)}\n`
+ end else begin
+ let (m, p) = Selectgen.divimm_parameters (Nativeint.of_int n) in
+ (* Compute quotient as in the Idiv immediate case, but
+ compute it in rdx, preserving rcx *)
+ ` movabsq ${emit_nativeint m}, %rdx\n`;
+ ` movq %rcx, %rax\n`;
+ ` imulq %rdx\n`;
+ if m < 0n then
+ ` addq %rcx, %rdx\n`;
+ if p > 0 then
+ ` sarq ${emit_int p}, %rdx\n`;
+ ` movq %rcx, %rax\n`;
+ ` shrq $63, %rax\n`;
+ ` addq %rax, %rdx\n`;
+ (* Compute remainder via Euclid's equality *)
+ ` imulq ${emit_int n}, %rdx\n`;
+ ` subq %rdx, %rcx\n`
+ end
| Lop(Iintop_imm(op, n)) ->
(* We have i.arg.(0) = i.res.(0) *)
` {emit_string(instr_for_intop op)} ${emit_int n}, {emit_reg i.res.(0)}\n`
diff --git a/asmcomp/amd64/selection.ml b/asmcomp/amd64/selection.ml
index 4de841287..97e0e84de 100644
--- a/asmcomp/amd64/selection.ml
+++ b/asmcomp/amd64/selection.ml
@@ -105,10 +105,10 @@ let pseudoregs_for_operation op arg res =
([| rax; rcx |], [| rax |])
| Iintop(Imod) ->
([| rax; rcx |], [| rdx |])
- (* For div and mod with immediate operand, arg must not be in rax.
- Keep it simple, force it in rdx. *)
+ (* For div and mod with immediate operand, arg must not be in rax nor rdx.
+ Keep it simple, force it in rcx. *)
| Iintop_imm((Idiv|Imod), _) ->
- ([| rdx |], [| rdx |])
+ ([| rcx |], [| rcx |])
(* Other instructions are regular *)
| _ -> raise Use_default
@@ -176,18 +176,16 @@ method! select_operation op args =
| (Iindexed2 0, _) -> super#select_operation op args
| (addr, arg) -> (Ispecific(Ilea addr), [arg])
end
- (* Recognize (x / cst) and (x % cst) only if cst is a power of 2. *)
+ (* Recognize (x / cst) and (x % cst) only if cst is > 0. *)
| Cdivi ->
begin match args with
- [arg1; Cconst_int n] when self#is_immediate n
- && n = 1 lsl (Misc.log2 n) ->
+ [arg1; Cconst_int n] when self#is_immediate n && n > 0 ->
(Iintop_imm(Idiv, n), [arg1])
| _ -> (Iintop Idiv, args)
end
| Cmodi ->
begin match args with
- [arg1; Cconst_int n] when self#is_immediate n
- && n = 1 lsl (Misc.log2 n) ->
+ [arg1; Cconst_int n] when self#is_immediate n && n > 0 ->
(Iintop_imm(Imod, n), [arg1])
| _ -> (Iintop Imod, args)
end
diff --git a/asmcomp/selectgen.ml b/asmcomp/selectgen.ml
index 1d2bf96d2..fc2e7821d 100644
--- a/asmcomp/selectgen.ml
+++ b/asmcomp/selectgen.ml
@@ -835,3 +835,92 @@ let is_tail_call nargs =
let _ =
Simplif.is_tail_native_heuristic := is_tail_call
+
+(* Turning integer divisions into multiply-high then shift.
+ The [division_parameters] function is used in module Emit for
+ those target platforms that support this optimization. *)
+
+(* Unsigned comparison between native integers. *)
+
+let ucompare x y = Nativeint.(compare (add x min_int) (add y min_int))
+
+(* Unsigned division and modulus at type nativeint.
+ Algorithm: Hacker's Delight section 9.3 *)
+
+let udivmod n d = Nativeint.(
+ if d < 0n then
+ if ucompare n d < 0 then (0n, n) else (1n, sub n d)
+ else begin
+ let q = shift_left (div (shift_right_logical n 1) d) 1 in
+ let r = sub n (mul q d) in
+ if ucompare r d >= 0 then (succ q, sub r d) else (q, r)
+ end)
+
+(* Compute division parameters.
+ Algorithm: Hacker's Delight chapter 10, fig 10-1. *)
+
+let divimm_parameters d = Nativeint.(
+ assert (d > 0n);
+ let twopsm1 = min_int in (* 2^31 for 32-bit archs, 2^63 for 64-bit archs *)
+ let nc = sub (pred twopsm1) (snd (udivmod twopsm1 d)) in
+ let rec loop p (q1, r1) (q2, r2) =
+ let p = p + 1 in
+ let q1 = shift_left q1 1 and r1 = shift_left r1 1 in
+ let (q1, r1) =
+ if ucompare r1 nc >= 0 then (succ q1, sub r1 nc) else (q1, r1) in
+ let q2 = shift_left q2 1 and r2 = shift_left r2 1 in
+ let (q2, r2) =
+ if ucompare r2 d >= 0 then (succ q2, sub r2 d) else (q2, r2) in
+ let delta = sub d r2 in
+ if ucompare q1 delta < 0 || (q1 = delta && r1 = 0n)
+ then loop p (q1, r1) (q2, r2)
+ else (succ q2, p - size)
+ in loop (size - 1) (udivmod twopsm1 nc) (udivmod twopsm1 d))
+
+(* The result [(m, p)] of [divimm_parameters d] satisfies the following
+ inequality:
+
+ 2^(wordsize + p) < m * d <= 2^(wordsize + p) + 2^(p + 1) (i)
+
+ from which it follows that
+
+ floor(n / d) = floor(n * m / 2^(wordsize+p))
+ if 0 <= n < 2^(wordsize-1)
+ ceil(n / d) = floor(n * m / 2^(wordsize+p)) + 1
+ if -2^(wordsize-1) <= n < 0
+
+ The correctness condition (i) above can be checked by the code below.
+ It was exhaustively tested for values of d from 2 to 10^9 in the
+ wordsize = 64 case.
+
+let add2 (xh, xl) (yh, yl) =
+ let zl = add xl yl and zh = add xh yh in
+ ((if ucompare zl xl < 0 then succ zh else zh), zl)
+
+let shl2 (xh, xl) n =
+ assert (0 < n && n < size + size);
+ if n < size
+ then (logor (shift_left xh n) (shift_right_logical xl (size - n)),
+ shift_left xl n)
+ else (shift_left xl (n - size), 0n)
+
+let mul2 x y =
+ let halfsize = size / 2 in
+ let halfmask = pred (shift_left 1n halfsize) in
+ let xl = logand x halfmask and xh = shift_right_logical x halfsize in
+ let yl = logand y halfmask and yh = shift_right_logical y halfsize in
+ add2 (mul xh yh, 0n)
+ (add2 (shl2 (0n, mul xl yh) halfsize)
+ (add2 (shl2 (0n, mul xh yl) halfsize)
+ (0n, mul xl yl)))
+
+let ucompare2 (xh, xl) (yh, yl) =
+ let c = ucompare xh yh in if c = 0 then ucompare xl yl else c
+
+let validate d m p =
+ let md = mul2 m d in
+ let one2 = (0n, 1n) in
+ let twoszp = shl2 one2 (size + p) in
+ let twop1 = shl2 one2 (p + 1) in
+ ucompare2 twoszp md < 0 && ucompare2 md (add2 twoszp twop1) <= 0
+*)
diff --git a/asmcomp/selectgen.mli b/asmcomp/selectgen.mli
index 11af7c1ff..6fa6b3e1f 100644
--- a/asmcomp/selectgen.mli
+++ b/asmcomp/selectgen.mli
@@ -76,3 +76,5 @@ class virtual selector_generic : object
(Ident.t, Reg.t array) Tbl.t -> Cmm.expression -> Reg.t array option
method emit_tail : (Ident.t, Reg.t array) Tbl.t -> Cmm.expression -> unit
end
+
+val divimm_parameters: nativeint -> nativeint * int
diff --git a/testsuite/tests/basic/divint.ml b/testsuite/tests/basic/divint.ml
new file mode 100644
index 000000000..6dd4be3fc
--- /dev/null
+++ b/testsuite/tests/basic/divint.ml
@@ -0,0 +1,112 @@
+(***********************************************************************)
+(* *)
+(* OCaml *)
+(* *)
+(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
+(* *)
+(* Copyright 2013 Institut National de Recherche en Informatique et *)
+(* en Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the Q Public License version 1.0. *)
+(* *)
+(***********************************************************************)
+
+open Printf
+
+(* Test integer division and modulus, esp. ocamlopt's optimization
+ when the divisor is a constant. *)
+
+let error = ref false
+
+module WithInt = struct
+
+let d = ref 0
+let divref n = n / !d
+let modref n = n mod !d
+
+let test_one (df: int -> int) (mf: int -> int) x =
+ if not (df x = divref x && mf x = modref x) then begin
+ printf "mismatch for %d\n" x;
+ error := true
+ end
+
+let do_test divisor (df: int -> int) (mf: int -> int) =
+ d := divisor;
+ List.iter (test_one df mf)
+ [0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10;
+ 100; 1000; 10000; 100000; 1000000; max_int - 1; max_int;
+ -1; -2; -3; -4; -5; -6; -7; -8; -9; -10;
+ -100; -1000; -10000; -100000; -1000000; min_int + 1; min_int];
+ let seed = ref 0 in
+ for i = 1 to 1000 do
+ seed := !seed * 69069 + 25173;
+ test_one df mf !seed
+ done
+
+end
+
+module WithNat = struct
+
+let d = ref 0n
+let divref n = Nativeint.div n !d
+let modref n = Nativeint.rem n !d
+
+let test_one (df: nativeint -> nativeint) (mf: nativeint -> nativeint) x =
+ if not (df x = divref x && mf x = modref x) then begin
+ printf "mismatch for %nd\n" x;
+ error := true
+ end
+
+let do_test divisor (df: nativeint -> nativeint) (mf: nativeint -> nativeint) =
+ d := Nativeint.of_int divisor;
+ List.iter (test_one df mf)
+ [0n; 1n; 2n; 3n; 4n; 5n; 6n; 7n; 8n; 9n; 10n;
+ 100n; 1000n; 10000n; 100000n; 1000000n;
+ Nativeint.(pred max_int); Nativeint.max_int;
+ -1n; -2n; -3n; -4n; -5n; -6n; -7n; -8n; -9n; -10n;
+ -100n; -1000n; -10000n; -100000n; -1000000n;
+ Nativeint.(succ min_int); Nativeint.min_int];
+ let seed = ref 0n in
+ for i = 1 to 1000 do
+ seed := Nativeint.(add (mul !seed 69069n) 25173n);
+ test_one df mf !seed
+ done
+
+end
+
+let _ =
+ printf "2 int\n"; WithInt.do_test 2 (fun x -> x / 2)(fun x -> x mod 2);
+ printf "3 int\n"; WithInt.do_test 3 (fun x -> x / 3)(fun x -> x mod 3);
+ printf "4 int\n"; WithInt.do_test 4 (fun x -> x / 4)(fun x -> x mod 4);
+ printf "5 int\n"; WithInt.do_test 5 (fun x -> x / 5)(fun x -> x mod 5);
+ printf "6 int\n"; WithInt.do_test 6 (fun x -> x / 6)(fun x -> x mod 6);
+ printf "7 int\n"; WithInt.do_test 7 (fun x -> x / 7)(fun x -> x mod 7);
+ printf "9 int\n"; WithInt.do_test 9 (fun x -> x / 9)(fun x -> x mod 9);
+ printf "10 int\n"; WithInt.do_test 10 (fun x -> x / 10)(fun x -> x mod 10);
+ printf "11 int\n"; WithInt.do_test 11 (fun x -> x / 11)(fun x -> x mod 11);
+ printf "12 int\n"; WithInt.do_test 12 (fun x -> x / 12)(fun x -> x mod 12);
+ printf "25 int\n"; WithInt.do_test 25 (fun x -> x / 25)(fun x -> x mod 25);
+ printf "55 int\n"; WithInt.do_test 55 (fun x -> x / 55)(fun x -> x mod 55);
+ printf "125 int\n"; WithInt.do_test 125 (fun x -> x / 125)(fun x -> x mod 125);
+ printf "625 int\n"; WithInt.do_test 625 (fun x -> x / 625)(fun x -> x mod 625);
+ printf "-2 int\n"; WithInt.do_test (-2) (fun x -> x / (-2))(fun x -> x mod (-2));
+ printf "-3 int\n"; WithInt.do_test (-3) (fun x -> x / (-3))(fun x -> x mod (-3));
+
+ printf "2 nat\n"; WithNat.do_test 2 (fun x -> Nativeint.div x 2n)(fun x -> Nativeint.rem x 2n);
+ printf "3 nat\n"; WithNat.do_test 3 (fun x -> Nativeint.div x 3n)(fun x -> Nativeint.rem x 3n);
+ printf "4 nat\n"; WithNat.do_test 4 (fun x -> Nativeint.div x 4n)(fun x -> Nativeint.rem x 4n);
+ printf "5 nat\n"; WithNat.do_test 5 (fun x -> Nativeint.div x 5n)(fun x -> Nativeint.rem x 5n);
+ printf "6 nat\n"; WithNat.do_test 6 (fun x -> Nativeint.div x 6n)(fun x -> Nativeint.rem x 6n);
+ printf "7 nat\n"; WithNat.do_test 7 (fun x -> Nativeint.div x 7n)(fun x -> Nativeint.rem x 7n);
+ printf "9 nat\n"; WithNat.do_test 9 (fun x -> Nativeint.div x 9n)(fun x -> Nativeint.rem x 9n);
+ printf "10 nat\n"; WithNat.do_test 10 (fun x -> Nativeint.div x 10n)(fun x -> Nativeint.rem x 10n);
+ printf "11 nat\n"; WithNat.do_test 11 (fun x -> Nativeint.div x 11n)(fun x -> Nativeint.rem x 11n);
+ printf "12 nat\n"; WithNat.do_test 12 (fun x -> Nativeint.div x 12n)(fun x -> Nativeint.rem x 12n);
+ printf "25 nat\n"; WithNat.do_test 25 (fun x -> Nativeint.div x 25n)(fun x -> Nativeint.rem x 25n);
+ printf "55 nat\n"; WithNat.do_test 55 (fun x -> Nativeint.div x 55n)(fun x -> Nativeint.rem x 55n);
+ printf "125 nat\n"; WithNat.do_test 125 (fun x -> Nativeint.div x 125n)(fun x -> Nativeint.rem x 125n);
+ printf "625 nat\n"; WithNat.do_test 625 (fun x -> Nativeint.div x 625n)(fun x -> Nativeint.rem x 625n);
+ printf "-2 nat\n"; WithNat.do_test (-2) (fun x -> Nativeint.div x (-2n))(fun x -> Nativeint.rem x (-2n));
+ printf "-3 nat\n"; WithNat.do_test (-3) (fun x -> Nativeint.div x (-3n))(fun x -> Nativeint.rem x (-3n));
+
+ if !error then printf "TEST FAILED.\n" else printf "Test passed.\n"
+
diff --git a/testsuite/tests/basic/divint.reference b/testsuite/tests/basic/divint.reference
new file mode 100644
index 000000000..4aa1e2110
--- /dev/null
+++ b/testsuite/tests/basic/divint.reference
@@ -0,0 +1,33 @@
+2 int
+3 int
+4 int
+5 int
+6 int
+7 int
+9 int
+10 int
+11 int
+12 int
+25 int
+55 int
+125 int
+625 int
+-2 int
+-3 int
+2 nat
+3 nat
+4 nat
+5 nat
+6 nat
+7 nat
+9 nat
+10 nat
+11 nat
+12 nat
+25 nat
+55 nat
+125 nat
+625 nat
+-2 nat
+-3 nat
+Test passed.