summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXavier Leroy <xavier.leroy@inria.fr>2014-04-26 10:40:22 +0000
committerXavier Leroy <xavier.leroy@inria.fr>2014-04-26 10:40:22 +0000
commit558f40e3446854913d5ce011441c4b10da03f27e (patch)
tree84c78a2d3098937813daae9bd75d328dc0669840
parent95d98cd9782c0577b0c7290f6535b29e7bd4cd41 (diff)
New back-end optimization pass: common subexpression elimination (CSE).
(Reuses results of previous computations instead of recomputing them.) (Cherry-picked from branch backend-optim.) Tested on amd64/linux and i386/linux. Other back-ends compile (after assorted updates) but are untested. git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@14688 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
-rw-r--r--.depend9
-rw-r--r--Changes2
-rw-r--r--Makefile12
-rw-r--r--asmcomp/CSEgen.ml258
-rw-r--r--asmcomp/CSEgen.mli38
-rw-r--r--asmcomp/amd64/CSE.ml36
-rw-r--r--asmcomp/amd64/arch.ml16
-rw-r--r--asmcomp/amd64/emit.mlp6
-rw-r--r--asmcomp/amd64/emit_nt.mlp6
-rw-r--r--asmcomp/amd64/proc.ml4
-rw-r--r--asmcomp/amd64/selection.ml14
-rw-r--r--asmcomp/arm/CSE.ml38
-rw-r--r--asmcomp/arm/emit.mlp19
-rw-r--r--asmcomp/arm/proc.ml7
-rw-r--r--asmcomp/arm64/CSE.ml38
-rw-r--r--asmcomp/arm64/emit.mlp6
-rw-r--r--asmcomp/arm64/proc.ml14
-rw-r--r--asmcomp/asmgen.ml2
-rw-r--r--asmcomp/i386/CSE.ml48
-rw-r--r--asmcomp/i386/arch.ml17
-rw-r--r--asmcomp/i386/emit.mlp6
-rw-r--r--asmcomp/i386/emit_nt.mlp6
-rw-r--r--asmcomp/i386/selection.ml16
-rw-r--r--asmcomp/mach.ml2
-rw-r--r--asmcomp/mach.mli5
-rw-r--r--asmcomp/power/CSE.ml38
-rw-r--r--asmcomp/power/emit.mlp4
-rw-r--r--asmcomp/power/scheduling.ml2
-rw-r--r--asmcomp/printmach.ml5
-rw-r--r--asmcomp/schedgen.ml2
-rw-r--r--asmcomp/selectgen.ml14
-rw-r--r--asmcomp/selectgen.mli3
-rw-r--r--asmcomp/sparc/CSE.ml31
-rw-r--r--asmcomp/sparc/emit.mlp4
-rw-r--r--driver/main_args.ml8
-rw-r--r--driver/main_args.mli2
-rw-r--r--driver/optmain.ml1
-rw-r--r--tools/ocamloptp.ml1
-rw-r--r--utils/clflags.ml1
-rw-r--r--utils/clflags.mli1
40 files changed, 660 insertions, 82 deletions
diff --git a/.depend b/.depend
index 9162031fa..1d36a9c89 100644
--- a/.depend
+++ b/.depend
@@ -576,6 +576,7 @@ bytecomp/typeopt.cmo : typing/types.cmi typing/typedtree.cmi \
bytecomp/typeopt.cmx : typing/types.cmx typing/typedtree.cmx \
typing/predef.cmx typing/path.cmx bytecomp/lambda.cmx typing/ident.cmx \
typing/env.cmx typing/ctype.cmx bytecomp/typeopt.cmi
+asmcomp/CSEgen.cmi : asmcomp/mach.cmi
asmcomp/asmgen.cmi : bytecomp/lambda.cmi asmcomp/cmm.cmi
asmcomp/asmlibrarian.cmi :
asmcomp/asmlink.cmi : asmcomp/cmx_format.cmi
@@ -618,6 +619,10 @@ asmcomp/selection.cmi : asmcomp/mach.cmi asmcomp/cmm.cmi
asmcomp/spill.cmi : asmcomp/mach.cmi
asmcomp/split.cmi : asmcomp/mach.cmi
asmcomp/strmatch.cmi : asmcomp/cmm.cmi
+asmcomp/CSE.cmo : asmcomp/mach.cmi asmcomp/CSEgen.cmi asmcomp/arch.cmo
+asmcomp/CSE.cmx : asmcomp/mach.cmx asmcomp/CSEgen.cmx asmcomp/arch.cmx
+asmcomp/CSEgen.cmo : asmcomp/reg.cmi asmcomp/mach.cmi asmcomp/CSEgen.cmi
+asmcomp/CSEgen.cmx : asmcomp/reg.cmx asmcomp/mach.cmx asmcomp/CSEgen.cmi
asmcomp/arch.cmo :
asmcomp/arch.cmx :
asmcomp/asmgen.cmo : bytecomp/translmod.cmi asmcomp/split.cmi \
@@ -629,7 +634,7 @@ asmcomp/asmgen.cmo : bytecomp/translmod.cmi asmcomp/split.cmi \
asmcomp/emitaux.cmi asmcomp/emit.cmi asmcomp/deadcode.cmi \
utils/config.cmi asmcomp/compilenv.cmi asmcomp/comballoc.cmi \
asmcomp/coloring.cmi asmcomp/cmmgen.cmi asmcomp/cmm.cmi \
- asmcomp/closure.cmi utils/clflags.cmi asmcomp/asmgen.cmi
+ asmcomp/closure.cmi utils/clflags.cmi asmcomp/CSE.cmo asmcomp/asmgen.cmi
asmcomp/asmgen.cmx : bytecomp/translmod.cmx asmcomp/split.cmx \
asmcomp/spill.cmx asmcomp/selection.cmx asmcomp/scheduling.cmx \
asmcomp/reload.cmx asmcomp/reg.cmx asmcomp/proc.cmx asmcomp/printmach.cmx \
@@ -639,7 +644,7 @@ asmcomp/asmgen.cmx : bytecomp/translmod.cmx asmcomp/split.cmx \
asmcomp/emitaux.cmx asmcomp/emit.cmx asmcomp/deadcode.cmx \
utils/config.cmx asmcomp/compilenv.cmx asmcomp/comballoc.cmx \
asmcomp/coloring.cmx asmcomp/cmmgen.cmx asmcomp/cmm.cmx \
- asmcomp/closure.cmx utils/clflags.cmx asmcomp/asmgen.cmi
+ asmcomp/closure.cmx utils/clflags.cmx asmcomp/CSE.cmx asmcomp/asmgen.cmi
asmcomp/asmlibrarian.cmo : utils/misc.cmi parsing/location.cmi \
utils/config.cmi asmcomp/compilenv.cmi asmcomp/cmx_format.cmi \
utils/clflags.cmi asmcomp/clambda.cmi utils/ccomp.cmi asmcomp/asmlink.cmi \
diff --git a/Changes b/Changes
index 3f32d5dee..0e4c42009 100644
--- a/Changes
+++ b/Changes
@@ -39,6 +39,8 @@ Compilers:
int32/int64/nativeint arithmetic. Constant propagation for floats
can be turned off with option -no-float-const-prop, for codes that
change FP rounding modes at run-time.
+- New back-end optimization pass: common subexpression elimination (CSE).
+ (Reuses results of previous computations instead of recomputing them.)
- New back-end optimization pass: dead code elimination.
(Removes arithmetic and load instructions whose results are unused.)
- PR#6269 Optimization of string matching (patch by Benoit Vaugon
diff --git a/Makefile b/Makefile
index 594c650e7..877df08ef 100644
--- a/Makefile
+++ b/Makefile
@@ -84,7 +84,9 @@ ASMCOMP=asmcomp/arch.cmo asmcomp/debuginfo.cmo \
asmcomp/clambda.cmo asmcomp/printclambda.cmo asmcomp/compilenv.cmo \
asmcomp/closure.cmo asmcomp/strmatch.cmo asmcomp/cmmgen.cmo \
asmcomp/printmach.cmo asmcomp/selectgen.cmo asmcomp/selection.cmo \
- asmcomp/comballoc.cmo asmcomp/liveness.cmo \
+ asmcomp/comballoc.cmo \
+ asmcomp/CSEgen.cmo asmcomp/CSE.cmo \
+ asmcomp/liveness.cmo \
asmcomp/spill.cmo asmcomp/split.cmo \
asmcomp/interf.cmo asmcomp/coloring.cmo \
asmcomp/reloadgen.cmo asmcomp/reload.cmo \
@@ -589,6 +591,14 @@ partialclean::
beforedepend:: asmcomp/selection.ml
+asmcomp/CSE.ml: asmcomp/$(ARCH)/CSE.ml
+ ln -s $(ARCH)/CSE.ml asmcomp/CSE.ml
+
+partialclean::
+ rm -f asmcomp/CSE.ml
+
+beforedepend:: asmcomp/CSE.ml
+
asmcomp/reload.ml: asmcomp/$(ARCH)/reload.ml
ln -s $(ARCH)/reload.ml asmcomp/reload.ml
diff --git a/asmcomp/CSEgen.ml b/asmcomp/CSEgen.ml
new file mode 100644
index 000000000..1cbef266b
--- /dev/null
+++ b/asmcomp/CSEgen.ml
@@ -0,0 +1,258 @@
+(***********************************************************************)
+(* *)
+(* OCaml *)
+(* *)
+(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
+(* *)
+(* Copyright 2014 Institut National de Recherche en Informatique et *)
+(* en Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the Q Public License version 1.0. *)
+(* *)
+(***********************************************************************)
+
+(* Common subexpression elimination by value numbering over extended
+ basic blocks. *)
+
+open Mach
+
+type valnum = int
+
+(* We maintain sets of equations of the form
+ valnums = operation(valnums)
+ plus a mapping from registers to value numbers. *)
+
+type rhs = operation * valnum array
+
+module Equations =
+ Map.Make(struct type t = rhs let compare = Pervasives.compare end)
+
+type numbering =
+ { num_next: int; (* next fresh value number *)
+ num_eqs: valnum array Equations.t; (* mapping rhs -> valnums *)
+ num_reg: valnum Reg.Map.t } (* mapping register -> valnum *)
+
+let empty_numbering =
+ { num_next = 0; num_eqs = Equations.empty; num_reg = Reg.Map.empty }
+
+(** [valnum_reg n r] returns the value number for the contents of
+ register [r]. If none exists, a fresh value number is returned
+ and associated with register [r]. The possibly updated numbering
+ is also returned. [valnum_regs] is similar, but for an array of
+ registers. *)
+
+let valnum_reg n r =
+ try
+ (n, Reg.Map.find r n.num_reg)
+ with Not_found ->
+ let v = n.num_next in
+ ({n with num_next = v + 1; num_reg = Reg.Map.add r v n.num_reg}, v)
+
+let valnum_regs n rs =
+ let l = Array.length rs in
+ let vs = Array.make l 0 in
+ let n = ref n in
+ for i = 0 to l-1 do
+ let (ni, vi) = valnum_reg !n rs.(i) in
+ vs.(i) <- vi;
+ n := ni
+ done;
+ (!n, vs)
+
+(* Look up the set of equations for an equation with the given rhs.
+ Return [Some res] if there is one, where [res] is the lhs. *)
+
+let find_equation n rhs =
+ try
+ Some(Equations.find rhs n.num_eqs)
+ with Not_found ->
+ None
+
+(* Find a set of registers containing the given value numbers. *)
+
+let find_regs_containing n vs =
+ match Array.length vs with
+ | 0 -> Some [||]
+ | 1 -> let v = vs.(0) in
+ Reg.Map.fold (fun r v' res -> if v' = v then Some [|r|] else res)
+ n.num_reg None
+ | _ -> assert false
+
+(* Associate the given value numbers to the given result registers,
+ without adding new equations. *)
+
+let set_known_regs n rs vs =
+ match Array.length rs with
+ | 0 -> n
+ | 1 -> { n with num_reg = Reg.Map.add rs.(0) vs.(0) n.num_reg }
+ | _ -> assert false
+
+(* Record the effect of a move: no new equations, but the result reg
+ maps to the same value number as the argument reg. *)
+
+let set_move n src dst =
+ let (n1, v) = valnum_reg n src in
+ { n1 with num_reg = Reg.Map.add dst v n1.num_reg }
+
+(* Record the equation [fresh valnums = rhs] and associate the given
+ result registers [rs] to [fresh valnums]. *)
+
+let set_fresh_regs n rs rhs =
+ match Array.length rs with
+ | 0 -> { n with num_eqs = Equations.add rhs [||] n.num_eqs }
+ | 1 -> let v = n.num_next in
+ { num_next = v + 1;
+ num_eqs = Equations.add rhs [|v|] n.num_eqs;
+ num_reg = Reg.Map.add rs.(0) v n.num_reg }
+ | _ -> assert false
+
+(* Forget everything we know about the given result registers,
+ which are receiving unpredictable values at run-time. *)
+
+let set_unknown_regs n rs =
+ { n with num_reg = Array.fold_right Reg.Map.remove rs n.num_reg }
+
+(* Keep only the equations satisfying the given predicate. *)
+
+let filter_equations pred n =
+ { n with num_eqs = Equations.filter (fun (op,_) res -> pred op) n.num_eqs }
+
+(* Prepend a reg-reg move *)
+
+let insert_move srcs dsts i =
+ match Array.length srcs with
+ | 0 -> i
+ | 1 -> instr_cons (Iop Imove) srcs dsts i
+ | _ -> assert false
+
+(* Classification of operations *)
+
+type op_class =
+ | Op_pure (* pure, produce one result *)
+ | Op_checkbound (* checkbound-style: no result, can raise an exn *)
+ | Op_load (* memory load *)
+ | Op_store of bool (* memory store, false = init, true = assign *)
+ | Op_other (* anything else that does not store in memory *)
+
+class cse_generic = object (self)
+
+(* Default classification of operations. Can be overriden in
+ processor-specific files to classify specific operations better. *)
+
+method class_of_operation op =
+ match op with
+ | Imove | Ispill | Ireload -> assert false (* treated specially *)
+ | Iconst_int _ | Iconst_float _ | Iconst_symbol _
+ | Iconst_blockheader _ -> Op_pure
+ | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
+ | Iextcall _ -> assert false (* treated specially *)
+ | Istackoffset _ -> Op_other
+ | Iload(_,_) -> Op_load
+ | Istore(_,_,asg) -> Op_store asg
+ | Ialloc _ -> Op_other
+ | Iintop(Icheckbound) -> Op_checkbound
+ | Iintop _ -> Op_pure
+ | Iintop_imm(Icheckbound, _) -> Op_checkbound
+ | Iintop_imm(_, _) -> Op_pure
+ | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf
+ | Ifloatofint | Iintoffloat -> Op_pure
+ | Ispecific _ -> Op_other
+
+(* Operations that are so cheap that it isn't worth factoring them. *)
+
+method is_cheap_operation op =
+ match op with
+ | Iconst_int _ | Iconst_blockheader _ -> true
+ | _ -> false
+
+(* Forget all equations involving memory loads. Performed after a
+ non-initializing store *)
+
+method private kill_loads n =
+ filter_equations (fun o -> self#class_of_operation o <> Op_load) n
+
+(* Keep only equations involving checkbounds, and forget register values.
+ Performed across a call. *)
+
+method private keep_checkbounds n =
+ filter_equations (fun o -> self#class_of_operation o = Op_checkbound)
+ {n with num_reg = Reg.Map.empty }
+
+(* Perform CSE on the given instruction [i] and its successors.
+ [n] is the value numbering current at the beginning of [i]. *)
+
+method private cse n i =
+ match i.desc with
+ | Iend | Ireturn | Iop(Itailcall_ind) | Iop(Itailcall_imm _)
+ | Iexit _ | Iraise _ ->
+ i
+ | Iop (Imove | Ispill | Ireload) ->
+ (* For moves, we associate the same value number to the result reg
+ as to the argument reg. *)
+ let n1 = set_move n i.arg.(0) i.res.(0) in
+ {i with next = self#cse n1 i.next}
+ | Iop (Icall_ind | Icall_imm _ | Iextcall _) ->
+ (* We don't perform CSE across function calls, as it increases
+ register pressure too much. We do remember the checkbound
+ instructions already performed, though, since their reuse
+ cannot increase register pressure. *)
+ let n1 = self#keep_checkbounds n in
+ {i with next = self#cse n1 i.next}
+ | Iop op ->
+ begin match self#class_of_operation op with
+ | Op_pure | Op_checkbound | Op_load ->
+ assert (Array.length i.res <= 1);
+ let (n1, varg) = valnum_regs n i.arg in
+ begin match find_equation n1 (op, varg) with
+ | Some vres ->
+ (* This operation was computed earlier. *)
+ let n2 = set_known_regs n1 i.res vres in
+ begin match find_regs_containing n1 vres with
+ | Some res when not (self#is_cheap_operation op) ->
+ (* We can replace res <- op args with r <- move res.
+ If the operation is very cheap to compute, e.g.
+ an integer constant, don't bother. *)
+ insert_move res i.res (self#cse n2 i.next)
+ | _ ->
+ {i with next = self#cse n2 i.next}
+ end
+ | None ->
+ (* This operation produces a result we haven't seen earlier. *)
+ let n2 = set_fresh_regs n1 i.res (op, varg) in
+ {i with next = self#cse n2 i.next}
+ end
+ | Op_store false | Op_other ->
+ (* An initializing store or an "other" operation do not invalidate
+ any equations, but we do not know anything about the results. *)
+ let n1 = set_unknown_regs n i.res in
+ {i with next = self#cse n1 i.next}
+ | Op_store true ->
+ (* A non-initializing store: it can invalidate
+ anything we know about prior loads. *)
+ let n1 = set_unknown_regs (self#kill_loads n) i.res in
+ {i with next = self#cse n1 i.next}
+ end
+ (* For control structures, we set the numbering to empty at every
+ join point, but propagate the current numbering across fork points. *)
+ | Iifthenelse(test, ifso, ifnot) ->
+ {i with desc = Iifthenelse(test, self#cse n ifso, self#cse n ifnot);
+ next = self#cse empty_numbering i.next}
+ | Iswitch(index, cases) ->
+ {i with desc = Iswitch(index, Array.map (self#cse n) cases);
+ next = self#cse empty_numbering i.next}
+ | Iloop(body) ->
+ {i with desc = Iloop(self#cse empty_numbering body);
+ next = self#cse empty_numbering i.next}
+ | Icatch(nfail, body, handler) ->
+ {i with desc = Icatch(nfail, self#cse n body, self#cse empty_numbering handler);
+ next = self#cse empty_numbering i.next}
+ | Itrywith(body, handler) ->
+ {i with desc = Itrywith(self#cse n body, self#cse empty_numbering handler);
+ next = self#cse empty_numbering i.next}
+
+method fundecl f =
+ {f with fun_body = self#cse empty_numbering f.fun_body}
+
+end
+
+
+
diff --git a/asmcomp/CSEgen.mli b/asmcomp/CSEgen.mli
new file mode 100644
index 000000000..c19855eca
--- /dev/null
+++ b/asmcomp/CSEgen.mli
@@ -0,0 +1,38 @@
+(***********************************************************************)
+(* *)
+(* OCaml *)
+(* *)
+(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
+(* *)
+(* Copyright 2014 Institut National de Recherche en Informatique et *)
+(* en Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the Q Public License version 1.0. *)
+(* *)
+(***********************************************************************)
+
+(* Common subexpression elimination by value numbering over extended
+ basic blocks. *)
+
+type op_class =
+ | Op_pure (* pure, produce one result *)
+ | Op_checkbound (* checkbound-style: no result, can raise an exn *)
+ | Op_load (* memory load *)
+ | Op_store of bool (* memory store, false = init, true = assign *)
+ | Op_other (* anything else that does not store in memory *)
+
+class cse_generic : object
+ (* The following methods can be overriden to handle processor-specific
+ operations. *)
+
+ method class_of_operation: Mach.operation -> op_class
+
+ method is_cheap_operation: Mach.operation -> bool
+ (* Operations that are so cheap that it isn't worth factoring them. *)
+
+ (* The following method is the entry point and should not be overridden *)
+ method fundecl: Mach.fundecl -> Mach.fundecl
+
+end
+
+
+
diff --git a/asmcomp/amd64/CSE.ml b/asmcomp/amd64/CSE.ml
new file mode 100644
index 000000000..63ef08853
--- /dev/null
+++ b/asmcomp/amd64/CSE.ml
@@ -0,0 +1,36 @@
+(***********************************************************************)
+(* *)
+(* OCaml *)
+(* *)
+(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
+(* *)
+(* Copyright 2014 Institut National de Recherche en Informatique et *)
+(* en Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the Q Public License version 1.0. *)
+(* *)
+(***********************************************************************)
+
+(* CSE for the AMD64 *)
+
+open Arch
+open Mach
+open CSEgen
+
+class cse = object (self)
+
+inherit cse_generic as super
+
+method! class_of_operation op =
+ match op with
+ | Ispecific(Ilea _) -> Op_pure
+ | Ispecific(Istore_int(_, _, is_asg)) -> Op_store is_asg
+ | Ispecific(Istore_symbol(_, _, is_asg)) -> Op_store is_asg
+ | Ispecific(Ioffset_loc(_, _)) -> Op_store true
+ | Ispecific(Ifloatarithmem _) -> Op_load
+ | _ -> super#class_of_operation op
+
+end
+
+let fundecl f =
+ (new cse)#fundecl f
+
diff --git a/asmcomp/amd64/arch.ml b/asmcomp/amd64/arch.ml
index b0a5ffb8b..3741dd74b 100644
--- a/asmcomp/amd64/arch.ml
+++ b/asmcomp/amd64/arch.ml
@@ -33,8 +33,8 @@ type addressing_mode =
type specific_operation =
Ilea of addressing_mode (* "lea" gives scaled adds *)
- | Istore_int of nativeint * addressing_mode (* Store an integer constant *)
- | Istore_symbol of string * addressing_mode (* Store a symbol *)
+ | Istore_int of nativeint * addressing_mode * bool (* Store an integer constant *)
+ | Istore_symbol of string * addressing_mode * bool (* Store a symbol *)
| Ioffset_loc of int * addressing_mode (* Add a constant to a location *)
| Ifloatarithmem of float_operation * addressing_mode
(* Float arith operation with memory *)
@@ -101,10 +101,14 @@ let print_addressing printreg addr ppf arg =
let print_specific_operation printreg op ppf arg =
match op with
| Ilea addr -> print_addressing printreg addr ppf arg
- | Istore_int(n, addr) ->
- fprintf ppf "[%a] := %nd" (print_addressing printreg addr) arg n
- | Istore_symbol(lbl, addr) ->
- fprintf ppf "[%a] := \"%s\"" (print_addressing printreg addr) arg lbl
+ | Istore_int(n, addr, is_assign) ->
+ fprintf ppf "[%a] := %nd %s"
+ (print_addressing printreg addr) arg n
+ (if is_assign then "(assign)" else "(init)")
+ | Istore_symbol(lbl, addr, is_assign) ->
+ fprintf ppf "[%a] := \"%s\" %s"
+ (print_addressing printreg addr) arg lbl
+ (if is_assign then "(assign)" else "(init)")
| Ioffset_loc(n, addr) ->
fprintf ppf "[%a] +:= %i" (print_addressing printreg addr) arg n
| Isqrtf ->
diff --git a/asmcomp/amd64/emit.mlp b/asmcomp/amd64/emit.mlp
index bea7e9331..b576ece98 100644
--- a/asmcomp/amd64/emit.mlp
+++ b/asmcomp/amd64/emit.mlp
@@ -449,7 +449,7 @@ let emit_instr fallthrough i =
| Double | Double_u ->
` movsd {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
end
- | Lop(Istore(chunk, addr)) ->
+ | Lop(Istore(chunk, addr, _)) ->
begin match chunk with
| Word ->
` movq {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
@@ -542,9 +542,9 @@ let emit_instr fallthrough i =
` cvttsd2siq {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n`
| Lop(Ispecific(Ilea addr)) ->
` leaq {emit_addressing addr i.arg 0}, {emit_reg i.res.(0)}\n`
- | Lop(Ispecific(Istore_int(n, addr))) ->
+ | Lop(Ispecific(Istore_int(n, addr, _))) ->
` movq ${emit_nativeint n}, {emit_addressing addr i.arg 0}\n`
- | Lop(Ispecific(Istore_symbol(s, addr))) ->
+ | Lop(Ispecific(Istore_symbol(s, addr, _))) ->
assert (not !pic_code && not !Clflags.dlcode);
` movq ${emit_symbol s}, {emit_addressing addr i.arg 0}\n`
| Lop(Ispecific(Ioffset_loc(n, addr))) ->
diff --git a/asmcomp/amd64/emit_nt.mlp b/asmcomp/amd64/emit_nt.mlp
index cc158d889..a66f0c93b 100644
--- a/asmcomp/amd64/emit_nt.mlp
+++ b/asmcomp/amd64/emit_nt.mlp
@@ -443,7 +443,7 @@ let emit_instr fallthrough i =
| Double | Double_u ->
` movsd {emit_reg dest}, REAL8 PTR {emit_addressing addr i.arg 0}\n`
end
- | Lop(Istore(chunk, addr)) ->
+ | Lop(Istore(chunk, addr, _)) ->
begin match chunk with
| Word ->
` mov QWORD PTR {emit_addressing addr i.arg 1}, {emit_reg i.arg.(0)}\n`
@@ -532,9 +532,9 @@ let emit_instr fallthrough i =
` cvttsd2si {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
| Lop(Ispecific(Ilea addr)) ->
` lea {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`
- | Lop(Ispecific(Istore_int(n, addr))) ->
+ | Lop(Ispecific(Istore_int(n, addr, _))) ->
` mov QWORD PTR {emit_addressing addr i.arg 0}, {emit_nativeint n}\n`
- | Lop(Ispecific(Istore_symbol(s, addr))) ->
+ | Lop(Ispecific(Istore_symbol(s, addr, _))) ->
assert (not !pic_code);
add_used_symbol s;
` mov QWORD PTR {emit_addressing addr i.arg 0}, OFFSET {emit_symbol s}\n`
diff --git a/asmcomp/amd64/proc.ml b/asmcomp/amd64/proc.ml
index 5d448df36..cd06559e1 100644
--- a/asmcomp/amd64/proc.ml
+++ b/asmcomp/amd64/proc.ml
@@ -259,7 +259,7 @@ let destroyed_at_oper = function
| Iop(Iextcall(_, false)) -> destroyed_at_c_call
| Iop(Iintop(Idiv | Imod)) | Iop(Iintop_imm((Idiv | Imod), _))
-> [| rax; rdx |]
- | Iop(Istore(Single, _)) -> [| rxmm15 |]
+ | Iop(Istore(Single, _, _)) -> [| rxmm15 |]
| Iop(Ialloc _ | Iintop(Imulh | Icomp _) | Iintop_imm((Icomp _), _))
-> [| rax |]
| Iswitch(_, _) -> [| rax; rdx |]
@@ -290,7 +290,7 @@ let max_register_pressure = function
if fp then [| 10; 16 |] else [| 11; 16 |]
| Ialloc _ | Iintop(Icomp _) | Iintop_imm((Icomp _), _) ->
if fp then [| 11; 16 |] else [| 12; 16 |]
- | Istore(Single, _) ->
+ | Istore(Single, _, _) ->
if fp then [| 12; 15 |] else [| 13; 15 |]
| _ -> if fp then [| 12; 16 |] else [| 13; 16 |]
diff --git a/asmcomp/amd64/selection.ml b/asmcomp/amd64/selection.ml
index 5e6afbcab..fa7fe66c0 100644
--- a/asmcomp/amd64/selection.ml
+++ b/asmcomp/amd64/selection.ml
@@ -152,20 +152,20 @@ method select_addressing chunk exp =
| Ascaledadd(e1, e2, scale) ->
(Iindexed2scaled(scale, d), Ctuple[e1; e2])
-method! select_store addr exp =
+method! select_store is_assign addr exp =
match exp with
Cconst_int n when self#is_immediate n ->
- (Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple [])
+ (Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple [])
| (Cconst_natint n | Cconst_blockheader n) when self#is_immediate_natint n ->
- (Ispecific(Istore_int(n, addr)), Ctuple [])
+ (Ispecific(Istore_int(n, addr, is_assign)), Ctuple [])
| Cconst_pointer n when self#is_immediate n ->
- (Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple [])
+ (Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple [])
| Cconst_natpointer n when self#is_immediate_natint n ->
- (Ispecific(Istore_int(n, addr)), Ctuple [])
+ (Ispecific(Istore_int(n, addr, is_assign)), Ctuple [])
| Cconst_symbol s when not (!pic_code || !Clflags.dlcode) ->
- (Ispecific(Istore_symbol(s, addr)), Ctuple [])
+ (Ispecific(Istore_symbol(s, addr, is_assign)), Ctuple [])
| _ ->
- super#select_store addr exp
+ super#select_store is_assign addr exp
method! select_operation op args =
match op with
diff --git a/asmcomp/arm/CSE.ml b/asmcomp/arm/CSE.ml
new file mode 100644
index 000000000..00282f1f5
--- /dev/null
+++ b/asmcomp/arm/CSE.ml
@@ -0,0 +1,38 @@
+(***********************************************************************)
+(* *)
+(* OCaml *)
+(* *)
+(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
+(* *)
+(* Copyright 2014 Institut National de Recherche en Informatique et *)
+(* en Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the Q Public License version 1.0. *)
+(* *)
+(***********************************************************************)
+
+(* CSE for ARM *)
+
+open Arch
+open Mach
+open CSEgen
+
+class cse = object (self)
+
+inherit cse_generic as super
+
+method! class_of_operation op =
+ match op with
+ | Ispecific(Ishiftcheckbound _) -> Op_checkbound
+ | Ispecific _ -> Op_pure
+ | _ -> super#class_of_operation op
+
+method! is_cheap_operation op =
+ match op with
+ | Iconst_int n | Iconst_blockheader n -> n <= 255n && n >= 0n
+ | _ -> false
+
+end
+
+let fundecl f =
+ (new cse)#fundecl f
+
diff --git a/asmcomp/arm/emit.mlp b/asmcomp/arm/emit.mlp
index 18c873de2..61035b85f 100644
--- a/asmcomp/arm/emit.mlp
+++ b/asmcomp/arm/emit.mlp
@@ -283,7 +283,7 @@ let num_literals = ref 0
(* Label a floating-point literal *)
let float_literal f =
- let repr = Int64.bits_of_float cst in
+ let repr = Int64.bits_of_float f in
try
List.assoc repr !float_literals
with Not_found ->
@@ -391,8 +391,7 @@ let emit_instr i =
| Lop(Iconst_int n | Iconst_blockheader n) ->
emit_intconst i.res.(0) (Nativeint.to_int32 n)
| Lop(Iconst_float f) when !fpu = Soft ->
- ` @ {emit_string f}\n`;
- let bits = Int64.bits_of_float (float_of_string f) in
+ let bits = Int64.bits_of_float f in
let high_bits = Int64.to_int32 (Int64.shift_right_logical bits 32)
and low_bits = Int64.to_int32 bits in
if is_immediate low_bits || is_immediate high_bits then begin
@@ -407,7 +406,7 @@ let emit_instr i =
end
| Lop(Iconst_float f) when !fpu = VFPv2 ->
let lbl = float_literal f in
- ` fldd {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string f}\n`;
+ ` fldd {emit_reg i.res.(0)}, {emit_label lbl}\n`;
1
| Lop(Iconst_float f) ->
let encode imm =
@@ -426,12 +425,12 @@ let emit_instr i =
let ex = ((ex + 3) land 0x07) lxor 0x04 in
Some((sg lsl 7) lor (ex lsl 4) lor mn)
end in
- begin match encode (Int64.bits_of_float (float_of_string f)) with
+ begin match encode (Int64.bits_of_float f) with
None ->
let lbl = float_literal f in
- ` fldd {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string f}\n`
+ ` fldd {emit_reg i.res.(0)}, {emit_label lbl}\n`
| Some imm8 ->
- ` fconstd {emit_reg i.res.(0)}, #{emit_int imm8} @ {emit_string f}\n`
+ ` fconstd {emit_reg i.res.(0)}, #{emit_int imm8}\n`
end; 1
| Lop(Iconst_symbol s) ->
emit_load_symbol_addr i.res.(0) s
@@ -509,10 +508,10 @@ let emit_instr i =
| Double_u -> "fldd"
| _ (* 32-bit quantities *) -> "ldr" in
` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 0}\n`; 1
- | Lop(Istore(Single, addr)) when !fpu >= VFPv2 ->
+ | Lop(Istore(Single, addr, _)) when !fpu >= VFPv2 ->
` fcvtsd s14, {emit_reg i.arg.(0)}\n`;
` fsts s14, {emit_addressing addr i.arg 1}\n`; 2
- | Lop(Istore((Double | Double_u), addr)) when !fpu = Soft ->
+ | Lop(Istore((Double | Double_u), addr, _)) when !fpu = Soft ->
(* Use STM or STRD if possible *)
begin match i.arg.(0), i.arg.(1), addr with
{loc = Reg rt}, {loc = Reg rt2}, Iindexed 0
@@ -526,7 +525,7 @@ let emit_instr i =
` str {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 2}\n`;
` str {emit_reg i.arg.(1)}, {emit_addressing addr' i.arg 2}\n`; 2
end
- | Lop(Istore(size, addr)) ->
+ | Lop(Istore(size, addr, _)) ->
let r = i.arg.(0) in
let instr =
match size with
diff --git a/asmcomp/arm/proc.ml b/asmcomp/arm/proc.ml
index c49e3f087..a5bf3d5c8 100644
--- a/asmcomp/arm/proc.ml
+++ b/asmcomp/arm/proc.ml
@@ -203,7 +203,7 @@ let destroyed_at_oper = function
[| phys_reg 3; phys_reg 8 |] (* r3 and r12 destroyed *)
| Iop(Iintop Imulh) when !arch < ARMv6 ->
[| phys_reg 8 |] (* r12 destroyed *)
- | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _)) ->
+ | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _, _)) ->
[| phys_reg 107 |] (* d7 (s14-s15) destroyed *)
| _ -> [||]
@@ -222,7 +222,7 @@ let max_register_pressure = function
| Ialloc _ -> if abi = EABI then [| 7; 0; 0 |] else [| 7; 8; 8 |]
| Iconst_symbol _ when !pic_code -> [| 7; 16; 32 |]
| Iintoffloat | Ifloatofint
- | Iload(Single, _) | Istore(Single, _) -> [| 9; 15; 31 |]
+ | Iload(Single, _) | Istore(Single, _, _) -> [| 9; 15; 31 |]
| _ -> [| 9; 16; 32 |]
(* Pure operations (without any side effect besides updating their result
@@ -231,7 +231,8 @@ let max_register_pressure = function
let op_is_pure = function
| Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
| Iextcall _ | Istackoffset _ | Istore _ | Ialloc _
- | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) -> false
+ | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _)
+ | Ispecific(Ishiftcheckbound _) -> false
| _ -> true
(* Layout of the stack *)
diff --git a/asmcomp/arm64/CSE.ml b/asmcomp/arm64/CSE.ml
new file mode 100644
index 000000000..359e57eb5
--- /dev/null
+++ b/asmcomp/arm64/CSE.ml
@@ -0,0 +1,38 @@
+(***********************************************************************)
+(* *)
+(* OCaml *)
+(* *)
+(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
+(* *)
+(* Copyright 2014 Institut National de Recherche en Informatique et *)
+(* en Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the Q Public License version 1.0. *)
+(* *)
+(***********************************************************************)
+
+(* CSE for ARM64 *)
+
+open Arch
+open Mach
+open CSEgen
+
+class cse = object (self)
+
+inherit cse_generic as super
+
+method! class_of_operation op =
+ match op with
+ | Ispecific(Ishiftcheckbound _) -> Op_checkbound
+ | Ispecific _ -> Op_pure
+ | _ -> super#class_of_operation op
+
+method! is_cheap_operation op =
+ match op with
+ | Iconst_int n | Iconst_blockheader n -> n <= 65535n && n >= 0n
+ | _ -> false
+
+end
+
+let fundecl f =
+ (new cse)#fundecl f
+
diff --git a/asmcomp/arm64/emit.mlp b/asmcomp/arm64/emit.mlp
index eaab3ef48..2c2454fde 100644
--- a/asmcomp/arm64/emit.mlp
+++ b/asmcomp/arm64/emit.mlp
@@ -328,9 +328,9 @@ let emit_instr i =
| Lop(Iconst_float f) ->
let b = Int64.bits_of_float f in
if b = 0L then
- ` fmov {emit_reg i.res.(0)}, xzr /* {emit_string f} */\n`
+ ` fmov {emit_reg i.res.(0)}, xzr\n`
else if is_immediate_float b then
- ` fmov {emit_reg i.res.(0)}, #{emit_printf "0x%Lx" b} /* {emit_string f} */\n`
+ ` fmov {emit_reg i.res.(0)}, #{emit_printf "0x%Lx" b}\n`
else begin
let lbl = float_literal b in
` adrp {emit_reg reg_tmp1}, {emit_label lbl}\n`;
@@ -388,7 +388,7 @@ let emit_instr i =
| Word | Double | Double_u ->
` ldr {emit_reg dst}, {emit_addressing addr base}\n`
end
- | Lop(Istore(size, addr)) ->
+ | Lop(Istore(size, addr, _)) ->
let src = i.arg.(0) in
let base =
match addr with
diff --git a/asmcomp/arm64/proc.ml b/asmcomp/arm64/proc.ml
index b52c2fd8a..d2cda5c23 100644
--- a/asmcomp/arm64/proc.ml
+++ b/asmcomp/arm64/proc.ml
@@ -177,7 +177,7 @@ let destroyed_at_oper = function
destroyed_at_c_call
| Iop(Ialloc _) ->
[| reg_x15 |]
- | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _)) ->
+ | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _, _)) ->
[| reg_d7 |] (* d7 / s7 destroyed *)
| _ -> [||]
@@ -194,9 +194,19 @@ let max_register_pressure = function
| Iextcall(_, _) -> [| 10; 8 |]
| Ialloc _ -> [| 25; 32 |]
| Iintoffloat | Ifloatofint
- | Iload(Single, _) | Istore(Single, _) -> [| 26; 31 |]
+ | Iload(Single, _) | Istore(Single, _, _) -> [| 26; 31 |]
| _ -> [| 26; 32 |]
+(* Pure operations (without any side effect besides updating their result
+ registers). *)
+
+let op_is_pure = function
+ | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
+ | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _
+ | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _)
+ | Ispecific(Ishiftcheckbound _) -> false
+ | _ -> true
+
(* Layout of the stack *)
let num_stack_slots = [| 0; 0 |]
diff --git a/asmcomp/asmgen.ml b/asmcomp/asmgen.ml
index ab71d51a0..311bb029b 100644
--- a/asmcomp/asmgen.ml
+++ b/asmcomp/asmgen.ml
@@ -64,6 +64,8 @@ let compile_fundecl (ppf : formatter) fd_cmm =
++ pass_dump_if ppf dump_selection "After instruction selection"
++ Comballoc.fundecl
++ pass_dump_if ppf dump_combine "After allocation combining"
+ ++ CSE.fundecl
+ ++ pass_dump_if ppf dump_cse "After CSE"
++ liveness ppf
++ Deadcode.fundecl
++ pass_dump_if ppf dump_live "Liveness analysis"
diff --git a/asmcomp/i386/CSE.ml b/asmcomp/i386/CSE.ml
new file mode 100644
index 000000000..3ce456702
--- /dev/null
+++ b/asmcomp/i386/CSE.ml
@@ -0,0 +1,48 @@
+(***********************************************************************)
+(* *)
+(* OCaml *)
+(* *)
+(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
+(* *)
+(* Copyright 2014 Institut National de Recherche en Informatique et *)
+(* en Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the Q Public License version 1.0. *)
+(* *)
+(***********************************************************************)
+
+(* CSE for the i386 *)
+
+open Cmm
+open Arch
+open Mach
+open CSEgen
+
+class cse = object (self)
+
+inherit cse_generic as super
+
+method! class_of_operation op =
+ match op with
+ (* Operations that affect the floating-point stack cannot be factored *)
+ | Iconst_float _ | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf
+ | Iintoffloat | Ifloatofint
+ | Iload((Single | Double | Double_u), _) -> Op_other
+ (* Specific ops *)
+ | Ispecific(Ilea _) -> Op_pure
+ | Ispecific(Istore_int(_, _, is_asg)) -> Op_store is_asg
+ | Ispecific(Istore_symbol(_, _, is_asg)) -> Op_store is_asg
+ | Ispecific(Ioffset_loc(_, _)) -> Op_store true
+ | Ispecific _ -> Op_other
+ | _ -> super#class_of_operation op
+
+method! is_cheap_operation op =
+ match op with
+ | Iconst_int _ | Iconst_blockheader _ -> true
+ | Iconst_symbol _ -> true
+ | _ -> false
+
+end
+
+let fundecl f =
+ (new cse)#fundecl f
+
diff --git a/asmcomp/i386/arch.ml b/asmcomp/i386/arch.ml
index d2f9fd61a..0d2130445 100644
--- a/asmcomp/i386/arch.ml
+++ b/asmcomp/i386/arch.ml
@@ -31,8 +31,8 @@ type addressing_mode =
type specific_operation =
Ilea of addressing_mode (* Lea gives scaled adds *)
- | Istore_int of nativeint * addressing_mode (* Store an integer constant *)
- | Istore_symbol of string * addressing_mode (* Store a symbol *)
+ | Istore_int of nativeint * addressing_mode * bool (* Store an integer constant *)
+ | Istore_symbol of string * addressing_mode * bool (* Store a symbol *)
| Ioffset_loc of int * addressing_mode (* Add a constant to a location *)
| Ipush (* Push regs on stack *)
| Ipush_int of nativeint (* Push an integer constant *)
@@ -105,11 +105,14 @@ let print_addressing printreg addr ppf arg =
let print_specific_operation printreg op ppf arg =
match op with
| Ilea addr -> print_addressing printreg addr ppf arg
- | Istore_int(n, addr) ->
- fprintf ppf "[%a] := %s" (print_addressing printreg addr) arg
- (Nativeint.to_string n)
- | Istore_symbol(lbl, addr) ->
- fprintf ppf "[%a] := \"%s\"" (print_addressing printreg addr) arg lbl
+ | Istore_int(n, addr, is_assign) ->
+ fprintf ppf "[%a] := %nd %s"
+ (print_addressing printreg addr) arg n
+ (if is_assign then "(assign)" else "(init)")
+ | Istore_symbol(lbl, addr, is_assign) ->
+ fprintf ppf "[%a] := \"%s\" %s"
+ (print_addressing printreg addr) arg lbl
+ (if is_assign then "(assign)" else "(init)")
| Ioffset_loc(n, addr) ->
fprintf ppf "[%a] +:= %i" (print_addressing printreg addr) arg n
| Ipush ->
diff --git a/asmcomp/i386/emit.mlp b/asmcomp/i386/emit.mlp
index 532c43dfe..98df5f958 100644
--- a/asmcomp/i386/emit.mlp
+++ b/asmcomp/i386/emit.mlp
@@ -544,7 +544,7 @@ let emit_instr fallthrough i =
| Double | Double_u ->
` fldl {emit_addressing addr i.arg 0}\n`
end
- | Lop(Istore(chunk, addr)) ->
+ | Lop(Istore(chunk, addr, _)) ->
begin match chunk with
| Word | Thirtytwo_signed | Thirtytwo_unsigned ->
` movl {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
@@ -684,9 +684,9 @@ let emit_instr fallthrough i =
stack_offset := !stack_offset + 8
| Lop(Ispecific(Ilea addr)) ->
` lea {emit_addressing addr i.arg 0}, {emit_reg i.res.(0)}\n`
- | Lop(Ispecific(Istore_int(n, addr))) ->
+ | Lop(Ispecific(Istore_int(n, addr, _))) ->
` movl ${emit_nativeint n}, {emit_addressing addr i.arg 0}\n`
- | Lop(Ispecific(Istore_symbol(s, addr))) ->
+ | Lop(Ispecific(Istore_symbol(s, addr, _))) ->
` movl ${emit_symbol s}, {emit_addressing addr i.arg 0}\n`
| Lop(Ispecific(Ioffset_loc(n, addr))) ->
` addl ${emit_int n}, {emit_addressing addr i.arg 0}\n`
diff --git a/asmcomp/i386/emit_nt.mlp b/asmcomp/i386/emit_nt.mlp
index 5d030ec07..a9c9db3e4 100644
--- a/asmcomp/i386/emit_nt.mlp
+++ b/asmcomp/i386/emit_nt.mlp
@@ -480,7 +480,7 @@ let emit_instr i =
| Double | Double_u ->
` fld REAL8 PTR {emit_addressing addr i.arg 0}\n`
end
- | Lop(Istore(chunk, addr)) ->
+ | Lop(Istore(chunk, addr, _)) ->
begin match chunk with
| Word | Thirtytwo_signed | Thirtytwo_unsigned ->
` mov DWORD PTR {emit_addressing addr i.arg 1}, {emit_reg i.arg.(0)}\n`
@@ -618,9 +618,9 @@ let emit_instr i =
stack_offset := !stack_offset + 8
| Lop(Ispecific(Ilea addr)) ->
` lea {emit_reg i.res.(0)}, DWORD PTR {emit_addressing addr i.arg 0}\n`
- | Lop(Ispecific(Istore_int(n, addr))) ->
+ | Lop(Ispecific(Istore_int(n, addr, _))) ->
` mov DWORD PTR {emit_addressing addr i.arg 0},{emit_nativeint n}\n`
- | Lop(Ispecific(Istore_symbol(s, addr))) ->
+ | Lop(Ispecific(Istore_symbol(s, addr, _))) ->
add_used_symbol s ;
` mov DWORD PTR {emit_addressing addr i.arg 0},OFFSET {emit_symbol s}\n`
| Lop(Ispecific(Ioffset_loc(n, addr))) ->
diff --git a/asmcomp/i386/selection.ml b/asmcomp/i386/selection.ml
index d86f1b282..10d2d40e3 100644
--- a/asmcomp/i386/selection.ml
+++ b/asmcomp/i386/selection.ml
@@ -135,7 +135,7 @@ let pseudoregs_for_operation op arg res =
(* For storing a byte, the argument must be in eax...edx.
(But for a short, any reg will do!)
Keep it simple, just force the argument to be in edx. *)
- | Istore((Byte_unsigned | Byte_signed), addr) ->
+ | Istore((Byte_unsigned | Byte_signed), addr, _) ->
let newarg = Array.copy arg in
newarg.(0) <- edx;
(newarg, res, false)
@@ -178,20 +178,20 @@ method select_addressing chunk exp =
| (Ascaledadd(e1, e2, scale), d) ->
(Iindexed2scaled(scale, d), Ctuple[e1; e2])
-method! select_store addr exp =
+method! select_store is_assign addr exp =
match exp with
Cconst_int n ->
- (Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple [])
+ (Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple [])
| (Cconst_natint n | Cconst_blockheader n) ->
- (Ispecific(Istore_int(n, addr)), Ctuple [])
+ (Ispecific(Istore_int(n, addr, is_assign)), Ctuple [])
| Cconst_pointer n ->
- (Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple [])
+ (Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple [])
| Cconst_natpointer n ->
- (Ispecific(Istore_int(n, addr)), Ctuple [])
+ (Ispecific(Istore_int(n, addr, is_assign)), Ctuple [])
| Cconst_symbol s ->
- (Ispecific(Istore_symbol(s, addr)), Ctuple [])
+ (Ispecific(Istore_symbol(s, addr, is_assign)), Ctuple [])
| _ ->
- super#select_store addr exp
+ super#select_store is_assign addr exp
method! select_operation op args =
match op with
diff --git a/asmcomp/mach.ml b/asmcomp/mach.ml
index 6b141230f..3a7174763 100644
--- a/asmcomp/mach.ml
+++ b/asmcomp/mach.ml
@@ -46,7 +46,7 @@ type operation =
| Iextcall of string * bool
| Istackoffset of int
| Iload of Cmm.memory_chunk * Arch.addressing_mode
- | Istore of Cmm.memory_chunk * Arch.addressing_mode
+ | Istore of Cmm.memory_chunk * Arch.addressing_mode * bool
| Ialloc of int
| Iintop of integer_operation
| Iintop_imm of integer_operation * int
diff --git a/asmcomp/mach.mli b/asmcomp/mach.mli
index 30643730d..618e5e4ce 100644
--- a/asmcomp/mach.mli
+++ b/asmcomp/mach.mli
@@ -43,10 +43,11 @@ type operation =
| Icall_imm of string
| Itailcall_ind
| Itailcall_imm of string
- | Iextcall of string * bool
+ | Iextcall of string * bool (* false = noalloc, true = alloc *)
| Istackoffset of int
| Iload of Cmm.memory_chunk * Arch.addressing_mode
- | Istore of Cmm.memory_chunk * Arch.addressing_mode
+ | Istore of Cmm.memory_chunk * Arch.addressing_mode * bool
+ (* false = initialization, true = assignment *)
| Ialloc of int
| Iintop of integer_operation
| Iintop_imm of integer_operation * int
diff --git a/asmcomp/power/CSE.ml b/asmcomp/power/CSE.ml
new file mode 100644
index 000000000..50fefa5e3
--- /dev/null
+++ b/asmcomp/power/CSE.ml
@@ -0,0 +1,38 @@
+(***********************************************************************)
+(* *)
+(* OCaml *)
+(* *)
+(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
+(* *)
+(* Copyright 2014 Institut National de Recherche en Informatique et *)
+(* en Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the Q Public License version 1.0. *)
+(* *)
+(***********************************************************************)
+
+(* CSE for the PowerPC *)
+
+open Arch
+open Mach
+open CSEgen
+
+class cse = object (self)
+
+inherit cse_generic as super
+
+method! class_of_operation op =
+ match op with
+ | Ispecific(Imultaddf | Imultsubf) -> Op_pure
+ | Ispecific(Ialloc_far _) -> Op_other
+ | _ -> super#class_of_operation op
+
+method! is_cheap_operation op =
+ match op with
+ | Iconst_int n | Iconst_blockheader n -> n <= 32767n && n >= -32768n
+ | _ -> false
+
+end
+
+let fundecl f =
+ (new cse)#fundecl f
+
diff --git a/asmcomp/power/emit.mlp b/asmcomp/power/emit.mlp
index f31d632c1..0a26ed147 100644
--- a/asmcomp/power/emit.mlp
+++ b/asmcomp/power/emit.mlp
@@ -333,7 +333,7 @@ let instr_size = function
if chunk = Byte_signed
then load_store_size addr + 1
else load_store_size addr
- | Lop(Istore(chunk, addr)) -> load_store_size addr
+ | Lop(Istore(chunk, addr, _)) -> load_store_size addr
| Lop(Ialloc n) -> 4
| Lop(Ispecific(Ialloc_far n)) -> 5
| Lop(Iintop Imod) -> 3
@@ -548,7 +548,7 @@ let rec emit_instr i dslot =
emit_load_store loadinstr addr i.arg 0 i.res.(0);
if chunk = Byte_signed then
` extsb {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
- | Lop(Istore(chunk, addr)) ->
+ | Lop(Istore(chunk, addr, _)) ->
let storeinstr =
match chunk with
Byte_unsigned | Byte_signed -> "stb"
diff --git a/asmcomp/power/scheduling.ml b/asmcomp/power/scheduling.ml
index 6e594f028..7adaa2eed 100644
--- a/asmcomp/power/scheduling.ml
+++ b/asmcomp/power/scheduling.ml
@@ -44,7 +44,7 @@ method reload_retaddr_latency = 12
method oper_issue_cycles = function
Iconst_float _ | Iconst_symbol _ -> 2
| Iload(_, Ibased(_, _)) -> 2
- | Istore(_, Ibased(_, _)) -> 2
+ | Istore(_, Ibased(_, _), _) -> 2
| Ialloc _ -> 4
| Iintop(Imod) -> 40 (* assuming full stall *)
| Iintop(Icomp _) -> 4
diff --git a/asmcomp/printmach.ml b/asmcomp/printmach.ml
index 0c577890d..a39160d28 100644
--- a/asmcomp/printmach.ml
+++ b/asmcomp/printmach.ml
@@ -119,12 +119,13 @@ let operation op arg ppf res =
| Iload(chunk, addr) ->
fprintf ppf "%s[%a]"
(Printcmm.chunk chunk) (Arch.print_addressing reg addr) arg
- | Istore(chunk, addr) ->
- fprintf ppf "%s[%a] := %a"
+ | Istore(chunk, addr, is_assign) ->
+ fprintf ppf "%s[%a] := %a %s"
(Printcmm.chunk chunk)
(Arch.print_addressing reg addr)
(Array.sub arg 1 (Array.length arg - 1))
reg arg.(0)
+ (if is_assign then "(assign)" else "(init)")
| Ialloc n -> fprintf ppf "alloc %i" n
| Iintop(op) -> fprintf ppf "%a%s%a" reg arg.(0) (intop op) reg arg.(1)
| Iintop_imm(op, n) -> fprintf ppf "%a%s%i" reg arg.(0) (intop op) n
diff --git a/asmcomp/schedgen.ml b/asmcomp/schedgen.ml
index e04eacd37..eb91854a5 100644
--- a/asmcomp/schedgen.ml
+++ b/asmcomp/schedgen.ml
@@ -165,7 +165,7 @@ method private instr_in_basic_block instr =
load or store instructions (e.g. on the I386). *)
method is_store = function
- Istore(_, _) -> true
+ Istore(_, _, _) -> true
| _ -> false
method is_load = function
diff --git a/asmcomp/selectgen.ml b/asmcomp/selectgen.ml
index a8f073e53..e30d6fec3 100644
--- a/asmcomp/selectgen.ml
+++ b/asmcomp/selectgen.ml
@@ -209,8 +209,8 @@ method virtual select_addressing :
(* Default instruction selection for stores (of words) *)
-method select_store addr arg =
- (Istore(Word, addr), arg)
+method select_store is_assign addr arg =
+ (Istore(Word, addr, is_assign), arg)
(* call marking methods, documented in selectgen.mli *)
@@ -256,10 +256,10 @@ method select_operation op args =
| (Cstore chunk, [arg1; arg2]) ->
let (addr, eloc) = self#select_addressing chunk arg1 in
if chunk = Word then begin
- let (op, newarg2) = self#select_store addr arg2 in
+ let (op, newarg2) = self#select_store true addr arg2 in
(op, [newarg2; eloc])
end else begin
- (Istore(chunk, addr), [arg2; eloc])
+ (Istore(chunk, addr, true), [arg2; eloc])
(* Inversion addr/datum in Istore *)
end
| (Calloc, _) -> (Ialloc 0, args)
@@ -677,16 +677,16 @@ method emit_stores env data regs_addr =
ref (Arch.offset_addressing Arch.identity_addressing (-Arch.size_int)) in
List.iter
(fun e ->
- let (op, arg) = self#select_store !a e in
+ let (op, arg) = self#select_store false !a e in
match self#emit_expr env arg with
None -> assert false
| Some regs ->
match op with
- Istore(_, _) ->
+ Istore(_, _, _) ->
for i = 0 to Array.length regs - 1 do
let r = regs.(i) in
let kind = if r.typ = Float then Double_u else Word in
- self#insert (Iop(Istore(kind, !a)))
+ self#insert (Iop(Istore(kind, !a, false)))
(Array.append [|r|] regs_addr) [||];
a := Arch.offset_addressing !a (size_component r.typ)
done
diff --git a/asmcomp/selectgen.mli b/asmcomp/selectgen.mli
index 7012c900c..abc6db5eb 100644
--- a/asmcomp/selectgen.mli
+++ b/asmcomp/selectgen.mli
@@ -35,7 +35,8 @@ class virtual selector_generic : object
method select_condition : Cmm.expression -> Mach.test * Cmm.expression
(* Can be overridden to deal with special test instructions *)
method select_store :
- Arch.addressing_mode -> Cmm.expression -> Mach.operation * Cmm.expression
+ bool -> Arch.addressing_mode -> Cmm.expression ->
+ Mach.operation * Cmm.expression
(* Can be overridden to deal with special store constant instructions *)
method regs_for : Cmm.machtype -> Reg.t array
(* Return an array of fresh registers of the given type.
diff --git a/asmcomp/sparc/CSE.ml b/asmcomp/sparc/CSE.ml
new file mode 100644
index 000000000..c38bab8fe
--- /dev/null
+++ b/asmcomp/sparc/CSE.ml
@@ -0,0 +1,31 @@
+(***********************************************************************)
+(* *)
+(* OCaml *)
+(* *)
+(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
+(* *)
+(* Copyright 2014 Institut National de Recherche en Informatique et *)
+(* en Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the Q Public License version 1.0. *)
+(* *)
+(***********************************************************************)
+
+(* CSE for Sparc *)
+
+open Mach
+open CSEgen
+
+class cse = object (self)
+
+inherit cse_generic (* as super *)
+
+method! is_cheap_operation op =
+ match op with
+ | Iconst_int n | Iconst_blockheader n -> n <= 4095n && n >= -4096n
+ | _ -> false
+
+end
+
+let fundecl f =
+ (new cse)#fundecl f
+
diff --git a/asmcomp/sparc/emit.mlp b/asmcomp/sparc/emit.mlp
index 2793776ff..877a3d52a 100644
--- a/asmcomp/sparc/emit.mlp
+++ b/asmcomp/sparc/emit.mlp
@@ -375,7 +375,7 @@ let rec emit_instr i dslot =
| _ -> "ld" in
emit_load loadinstr addr i.arg dest
end
- | Lop(Istore(chunk, addr)) ->
+ | Lop(Istore(chunk, addr, _)) ->
let src = i.arg.(0) in
begin match chunk with
Double_u ->
@@ -612,7 +612,7 @@ let is_one_instr i =
| Iconst_int n | Iconst_blockheader n -> is_native_immediate n
| Istackoffset _ -> true
| Iload(_, Iindexed n) -> i.res.(0).typ <> Float && is_immediate n
- | Istore(_, Iindexed n) -> i.arg.(0).typ <> Float && is_immediate n
+ | Istore(_, Iindexed n, _) -> i.arg.(0).typ <> Float && is_immediate n
| Iintop(op) -> is_one_instr_op op
| Iintop_imm(op, _) -> is_one_instr_op op
| Iaddf | Isubf | Imulf | Idivf -> true
diff --git a/driver/main_args.ml b/driver/main_args.ml
index 7d1c40256..f72a08fff 100644
--- a/driver/main_args.ml
+++ b/driver/main_args.ml
@@ -389,6 +389,10 @@ let mk_dcombine f =
"-dcombine", Arg.Unit f, " (undocumented)"
;;
+let mk_dcse f =
+ "-dcse", Arg.Unit f, " (undocumented)"
+;;
+
let mk_dlive f =
"-dlive", Arg.Unit f, " (undocumented)"
;;
@@ -599,6 +603,7 @@ module type Optcomp_options = sig
val _dcmm : unit -> unit
val _dsel : unit -> unit
val _dcombine : unit -> unit
+ val _dcse : unit -> unit
val _dlive : unit -> unit
val _dspill : unit -> unit
val _dsplit : unit -> unit
@@ -651,6 +656,7 @@ module type Opttop_options = sig
val _dcmm : unit -> unit
val _dsel : unit -> unit
val _dcombine : unit -> unit
+ val _dcse : unit -> unit
val _dlive : unit -> unit
val _dspill : unit -> unit
val _dsplit : unit -> unit
@@ -848,6 +854,7 @@ struct
mk_dcmm F._dcmm;
mk_dsel F._dsel;
mk_dcombine F._dcombine;
+ mk_dcse F._dcse;
mk_dlive F._dlive;
mk_dspill F._dspill;
mk_dsplit F._dsplit;
@@ -900,6 +907,7 @@ module Make_opttop_options (F : Opttop_options) = struct
mk_dcmm F._dcmm;
mk_dsel F._dsel;
mk_dcombine F._dcombine;
+ mk_dcse F._dcse;
mk_dlive F._dlive;
mk_dspill F._dspill;
mk_dsplit F._dsplit;
diff --git a/driver/main_args.mli b/driver/main_args.mli
index 6ebf95c9a..7d957d009 100644
--- a/driver/main_args.mli
+++ b/driver/main_args.mli
@@ -177,6 +177,7 @@ module type Optcomp_options = sig
val _dcmm : unit -> unit
val _dsel : unit -> unit
val _dcombine : unit -> unit
+ val _dcse : unit -> unit
val _dlive : unit -> unit
val _dspill : unit -> unit
val _dsplit : unit -> unit
@@ -229,6 +230,7 @@ module type Opttop_options = sig
val _dcmm : unit -> unit
val _dsel : unit -> unit
val _dcombine : unit -> unit
+ val _dcse : unit -> unit
val _dlive : unit -> unit
val _dspill : unit -> unit
val _dsplit : unit -> unit
diff --git a/driver/optmain.ml b/driver/optmain.ml
index 2fb1a22b3..c8060b6a9 100644
--- a/driver/optmain.ml
+++ b/driver/optmain.ml
@@ -135,6 +135,7 @@ module Options = Main_args.Make_optcomp_options (struct
let _dcmm = set dump_cmm
let _dsel = set dump_selection
let _dcombine = set dump_combine
+ let _dcse = set dump_cse
let _dlive () = dump_live := true; Printmach.print_live := true
let _dspill = set dump_spill
let _dsplit = set dump_split
diff --git a/tools/ocamloptp.ml b/tools/ocamloptp.ml
index 59cb661d9..69c288767 100644
--- a/tools/ocamloptp.ml
+++ b/tools/ocamloptp.ml
@@ -106,6 +106,7 @@ module Options = Main_args.Make_optcomp_options (struct
let _dcmm = option "-dcmm"
let _dsel = option "-dsel"
let _dcombine = option "-dcombine"
+ let _dcse = option "-dcse"
let _dlive = option "-dlive"
let _dspill = option "-dspill"
let _dsplit = option "-dsplit"
diff --git a/utils/clflags.ml b/utils/clflags.ml
index d0484b772..90b8a948f 100644
--- a/utils/clflags.ml
+++ b/utils/clflags.ml
@@ -73,6 +73,7 @@ let optimize_for_speed = ref true (* -compact *)
and dump_cmm = ref false (* -dcmm *)
let dump_selection = ref false (* -dsel *)
+let dump_cse = ref false (* -dcse *)
let dump_live = ref false (* -dlive *)
let dump_spill = ref false (* -dspill *)
let dump_split = ref false (* -dsplit *)
diff --git a/utils/clflags.mli b/utils/clflags.mli
index 02c378d23..489ac4d7b 100644
--- a/utils/clflags.mli
+++ b/utils/clflags.mli
@@ -68,6 +68,7 @@ val keep_asm_file : bool ref
val optimize_for_speed : bool ref
val dump_cmm : bool ref
val dump_selection : bool ref
+val dump_cse : bool ref
val dump_live : bool ref
val dump_spill : bool ref
val dump_split : bool ref