diff options
author | Xavier Leroy <xavier.leroy@inria.fr> | 2014-04-26 10:40:22 +0000 |
---|---|---|
committer | Xavier Leroy <xavier.leroy@inria.fr> | 2014-04-26 10:40:22 +0000 |
commit | 558f40e3446854913d5ce011441c4b10da03f27e (patch) | |
tree | 84c78a2d3098937813daae9bd75d328dc0669840 | |
parent | 95d98cd9782c0577b0c7290f6535b29e7bd4cd41 (diff) |
New back-end optimization pass: common subexpression elimination (CSE).
(Reuses results of previous computations instead of recomputing them.)
(Cherry-picked from branch backend-optim.)
Tested on amd64/linux and i386/linux.
Other back-ends compile (after assorted updates) but are untested.
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@14688 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
40 files changed, 660 insertions, 82 deletions
@@ -576,6 +576,7 @@ bytecomp/typeopt.cmo : typing/types.cmi typing/typedtree.cmi \ bytecomp/typeopt.cmx : typing/types.cmx typing/typedtree.cmx \ typing/predef.cmx typing/path.cmx bytecomp/lambda.cmx typing/ident.cmx \ typing/env.cmx typing/ctype.cmx bytecomp/typeopt.cmi +asmcomp/CSEgen.cmi : asmcomp/mach.cmi asmcomp/asmgen.cmi : bytecomp/lambda.cmi asmcomp/cmm.cmi asmcomp/asmlibrarian.cmi : asmcomp/asmlink.cmi : asmcomp/cmx_format.cmi @@ -618,6 +619,10 @@ asmcomp/selection.cmi : asmcomp/mach.cmi asmcomp/cmm.cmi asmcomp/spill.cmi : asmcomp/mach.cmi asmcomp/split.cmi : asmcomp/mach.cmi asmcomp/strmatch.cmi : asmcomp/cmm.cmi +asmcomp/CSE.cmo : asmcomp/mach.cmi asmcomp/CSEgen.cmi asmcomp/arch.cmo +asmcomp/CSE.cmx : asmcomp/mach.cmx asmcomp/CSEgen.cmx asmcomp/arch.cmx +asmcomp/CSEgen.cmo : asmcomp/reg.cmi asmcomp/mach.cmi asmcomp/CSEgen.cmi +asmcomp/CSEgen.cmx : asmcomp/reg.cmx asmcomp/mach.cmx asmcomp/CSEgen.cmi asmcomp/arch.cmo : asmcomp/arch.cmx : asmcomp/asmgen.cmo : bytecomp/translmod.cmi asmcomp/split.cmi \ @@ -629,7 +634,7 @@ asmcomp/asmgen.cmo : bytecomp/translmod.cmi asmcomp/split.cmi \ asmcomp/emitaux.cmi asmcomp/emit.cmi asmcomp/deadcode.cmi \ utils/config.cmi asmcomp/compilenv.cmi asmcomp/comballoc.cmi \ asmcomp/coloring.cmi asmcomp/cmmgen.cmi asmcomp/cmm.cmi \ - asmcomp/closure.cmi utils/clflags.cmi asmcomp/asmgen.cmi + asmcomp/closure.cmi utils/clflags.cmi asmcomp/CSE.cmo asmcomp/asmgen.cmi asmcomp/asmgen.cmx : bytecomp/translmod.cmx asmcomp/split.cmx \ asmcomp/spill.cmx asmcomp/selection.cmx asmcomp/scheduling.cmx \ asmcomp/reload.cmx asmcomp/reg.cmx asmcomp/proc.cmx asmcomp/printmach.cmx \ @@ -639,7 +644,7 @@ asmcomp/asmgen.cmx : bytecomp/translmod.cmx asmcomp/split.cmx \ asmcomp/emitaux.cmx asmcomp/emit.cmx asmcomp/deadcode.cmx \ utils/config.cmx asmcomp/compilenv.cmx asmcomp/comballoc.cmx \ asmcomp/coloring.cmx asmcomp/cmmgen.cmx asmcomp/cmm.cmx \ - asmcomp/closure.cmx utils/clflags.cmx asmcomp/asmgen.cmi + asmcomp/closure.cmx utils/clflags.cmx asmcomp/CSE.cmx asmcomp/asmgen.cmi asmcomp/asmlibrarian.cmo : utils/misc.cmi parsing/location.cmi \ utils/config.cmi asmcomp/compilenv.cmi asmcomp/cmx_format.cmi \ utils/clflags.cmi asmcomp/clambda.cmi utils/ccomp.cmi asmcomp/asmlink.cmi \ @@ -39,6 +39,8 @@ Compilers: int32/int64/nativeint arithmetic. Constant propagation for floats can be turned off with option -no-float-const-prop, for codes that change FP rounding modes at run-time. +- New back-end optimization pass: common subexpression elimination (CSE). + (Reuses results of previous computations instead of recomputing them.) - New back-end optimization pass: dead code elimination. (Removes arithmetic and load instructions whose results are unused.) - PR#6269 Optimization of string matching (patch by Benoit Vaugon @@ -84,7 +84,9 @@ ASMCOMP=asmcomp/arch.cmo asmcomp/debuginfo.cmo \ asmcomp/clambda.cmo asmcomp/printclambda.cmo asmcomp/compilenv.cmo \ asmcomp/closure.cmo asmcomp/strmatch.cmo asmcomp/cmmgen.cmo \ asmcomp/printmach.cmo asmcomp/selectgen.cmo asmcomp/selection.cmo \ - asmcomp/comballoc.cmo asmcomp/liveness.cmo \ + asmcomp/comballoc.cmo \ + asmcomp/CSEgen.cmo asmcomp/CSE.cmo \ + asmcomp/liveness.cmo \ asmcomp/spill.cmo asmcomp/split.cmo \ asmcomp/interf.cmo asmcomp/coloring.cmo \ asmcomp/reloadgen.cmo asmcomp/reload.cmo \ @@ -589,6 +591,14 @@ partialclean:: beforedepend:: asmcomp/selection.ml +asmcomp/CSE.ml: asmcomp/$(ARCH)/CSE.ml + ln -s $(ARCH)/CSE.ml asmcomp/CSE.ml + +partialclean:: + rm -f asmcomp/CSE.ml + +beforedepend:: asmcomp/CSE.ml + asmcomp/reload.ml: asmcomp/$(ARCH)/reload.ml ln -s $(ARCH)/reload.ml asmcomp/reload.ml diff --git a/asmcomp/CSEgen.ml b/asmcomp/CSEgen.ml new file mode 100644 index 000000000..1cbef266b --- /dev/null +++ b/asmcomp/CSEgen.ml @@ -0,0 +1,258 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* Common subexpression elimination by value numbering over extended + basic blocks. *) + +open Mach + +type valnum = int + +(* We maintain sets of equations of the form + valnums = operation(valnums) + plus a mapping from registers to value numbers. *) + +type rhs = operation * valnum array + +module Equations = + Map.Make(struct type t = rhs let compare = Pervasives.compare end) + +type numbering = + { num_next: int; (* next fresh value number *) + num_eqs: valnum array Equations.t; (* mapping rhs -> valnums *) + num_reg: valnum Reg.Map.t } (* mapping register -> valnum *) + +let empty_numbering = + { num_next = 0; num_eqs = Equations.empty; num_reg = Reg.Map.empty } + +(** [valnum_reg n r] returns the value number for the contents of + register [r]. If none exists, a fresh value number is returned + and associated with register [r]. The possibly updated numbering + is also returned. [valnum_regs] is similar, but for an array of + registers. *) + +let valnum_reg n r = + try + (n, Reg.Map.find r n.num_reg) + with Not_found -> + let v = n.num_next in + ({n with num_next = v + 1; num_reg = Reg.Map.add r v n.num_reg}, v) + +let valnum_regs n rs = + let l = Array.length rs in + let vs = Array.make l 0 in + let n = ref n in + for i = 0 to l-1 do + let (ni, vi) = valnum_reg !n rs.(i) in + vs.(i) <- vi; + n := ni + done; + (!n, vs) + +(* Look up the set of equations for an equation with the given rhs. + Return [Some res] if there is one, where [res] is the lhs. *) + +let find_equation n rhs = + try + Some(Equations.find rhs n.num_eqs) + with Not_found -> + None + +(* Find a set of registers containing the given value numbers. *) + +let find_regs_containing n vs = + match Array.length vs with + | 0 -> Some [||] + | 1 -> let v = vs.(0) in + Reg.Map.fold (fun r v' res -> if v' = v then Some [|r|] else res) + n.num_reg None + | _ -> assert false + +(* Associate the given value numbers to the given result registers, + without adding new equations. *) + +let set_known_regs n rs vs = + match Array.length rs with + | 0 -> n + | 1 -> { n with num_reg = Reg.Map.add rs.(0) vs.(0) n.num_reg } + | _ -> assert false + +(* Record the effect of a move: no new equations, but the result reg + maps to the same value number as the argument reg. *) + +let set_move n src dst = + let (n1, v) = valnum_reg n src in + { n1 with num_reg = Reg.Map.add dst v n1.num_reg } + +(* Record the equation [fresh valnums = rhs] and associate the given + result registers [rs] to [fresh valnums]. *) + +let set_fresh_regs n rs rhs = + match Array.length rs with + | 0 -> { n with num_eqs = Equations.add rhs [||] n.num_eqs } + | 1 -> let v = n.num_next in + { num_next = v + 1; + num_eqs = Equations.add rhs [|v|] n.num_eqs; + num_reg = Reg.Map.add rs.(0) v n.num_reg } + | _ -> assert false + +(* Forget everything we know about the given result registers, + which are receiving unpredictable values at run-time. *) + +let set_unknown_regs n rs = + { n with num_reg = Array.fold_right Reg.Map.remove rs n.num_reg } + +(* Keep only the equations satisfying the given predicate. *) + +let filter_equations pred n = + { n with num_eqs = Equations.filter (fun (op,_) res -> pred op) n.num_eqs } + +(* Prepend a reg-reg move *) + +let insert_move srcs dsts i = + match Array.length srcs with + | 0 -> i + | 1 -> instr_cons (Iop Imove) srcs dsts i + | _ -> assert false + +(* Classification of operations *) + +type op_class = + | Op_pure (* pure, produce one result *) + | Op_checkbound (* checkbound-style: no result, can raise an exn *) + | Op_load (* memory load *) + | Op_store of bool (* memory store, false = init, true = assign *) + | Op_other (* anything else that does not store in memory *) + +class cse_generic = object (self) + +(* Default classification of operations. Can be overriden in + processor-specific files to classify specific operations better. *) + +method class_of_operation op = + match op with + | Imove | Ispill | Ireload -> assert false (* treated specially *) + | Iconst_int _ | Iconst_float _ | Iconst_symbol _ + | Iconst_blockheader _ -> Op_pure + | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ + | Iextcall _ -> assert false (* treated specially *) + | Istackoffset _ -> Op_other + | Iload(_,_) -> Op_load + | Istore(_,_,asg) -> Op_store asg + | Ialloc _ -> Op_other + | Iintop(Icheckbound) -> Op_checkbound + | Iintop _ -> Op_pure + | Iintop_imm(Icheckbound, _) -> Op_checkbound + | Iintop_imm(_, _) -> Op_pure + | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf + | Ifloatofint | Iintoffloat -> Op_pure + | Ispecific _ -> Op_other + +(* Operations that are so cheap that it isn't worth factoring them. *) + +method is_cheap_operation op = + match op with + | Iconst_int _ | Iconst_blockheader _ -> true + | _ -> false + +(* Forget all equations involving memory loads. Performed after a + non-initializing store *) + +method private kill_loads n = + filter_equations (fun o -> self#class_of_operation o <> Op_load) n + +(* Keep only equations involving checkbounds, and forget register values. + Performed across a call. *) + +method private keep_checkbounds n = + filter_equations (fun o -> self#class_of_operation o = Op_checkbound) + {n with num_reg = Reg.Map.empty } + +(* Perform CSE on the given instruction [i] and its successors. + [n] is the value numbering current at the beginning of [i]. *) + +method private cse n i = + match i.desc with + | Iend | Ireturn | Iop(Itailcall_ind) | Iop(Itailcall_imm _) + | Iexit _ | Iraise _ -> + i + | Iop (Imove | Ispill | Ireload) -> + (* For moves, we associate the same value number to the result reg + as to the argument reg. *) + let n1 = set_move n i.arg.(0) i.res.(0) in + {i with next = self#cse n1 i.next} + | Iop (Icall_ind | Icall_imm _ | Iextcall _) -> + (* We don't perform CSE across function calls, as it increases + register pressure too much. We do remember the checkbound + instructions already performed, though, since their reuse + cannot increase register pressure. *) + let n1 = self#keep_checkbounds n in + {i with next = self#cse n1 i.next} + | Iop op -> + begin match self#class_of_operation op with + | Op_pure | Op_checkbound | Op_load -> + assert (Array.length i.res <= 1); + let (n1, varg) = valnum_regs n i.arg in + begin match find_equation n1 (op, varg) with + | Some vres -> + (* This operation was computed earlier. *) + let n2 = set_known_regs n1 i.res vres in + begin match find_regs_containing n1 vres with + | Some res when not (self#is_cheap_operation op) -> + (* We can replace res <- op args with r <- move res. + If the operation is very cheap to compute, e.g. + an integer constant, don't bother. *) + insert_move res i.res (self#cse n2 i.next) + | _ -> + {i with next = self#cse n2 i.next} + end + | None -> + (* This operation produces a result we haven't seen earlier. *) + let n2 = set_fresh_regs n1 i.res (op, varg) in + {i with next = self#cse n2 i.next} + end + | Op_store false | Op_other -> + (* An initializing store or an "other" operation do not invalidate + any equations, but we do not know anything about the results. *) + let n1 = set_unknown_regs n i.res in + {i with next = self#cse n1 i.next} + | Op_store true -> + (* A non-initializing store: it can invalidate + anything we know about prior loads. *) + let n1 = set_unknown_regs (self#kill_loads n) i.res in + {i with next = self#cse n1 i.next} + end + (* For control structures, we set the numbering to empty at every + join point, but propagate the current numbering across fork points. *) + | Iifthenelse(test, ifso, ifnot) -> + {i with desc = Iifthenelse(test, self#cse n ifso, self#cse n ifnot); + next = self#cse empty_numbering i.next} + | Iswitch(index, cases) -> + {i with desc = Iswitch(index, Array.map (self#cse n) cases); + next = self#cse empty_numbering i.next} + | Iloop(body) -> + {i with desc = Iloop(self#cse empty_numbering body); + next = self#cse empty_numbering i.next} + | Icatch(nfail, body, handler) -> + {i with desc = Icatch(nfail, self#cse n body, self#cse empty_numbering handler); + next = self#cse empty_numbering i.next} + | Itrywith(body, handler) -> + {i with desc = Itrywith(self#cse n body, self#cse empty_numbering handler); + next = self#cse empty_numbering i.next} + +method fundecl f = + {f with fun_body = self#cse empty_numbering f.fun_body} + +end + + + diff --git a/asmcomp/CSEgen.mli b/asmcomp/CSEgen.mli new file mode 100644 index 000000000..c19855eca --- /dev/null +++ b/asmcomp/CSEgen.mli @@ -0,0 +1,38 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* Common subexpression elimination by value numbering over extended + basic blocks. *) + +type op_class = + | Op_pure (* pure, produce one result *) + | Op_checkbound (* checkbound-style: no result, can raise an exn *) + | Op_load (* memory load *) + | Op_store of bool (* memory store, false = init, true = assign *) + | Op_other (* anything else that does not store in memory *) + +class cse_generic : object + (* The following methods can be overriden to handle processor-specific + operations. *) + + method class_of_operation: Mach.operation -> op_class + + method is_cheap_operation: Mach.operation -> bool + (* Operations that are so cheap that it isn't worth factoring them. *) + + (* The following method is the entry point and should not be overridden *) + method fundecl: Mach.fundecl -> Mach.fundecl + +end + + + diff --git a/asmcomp/amd64/CSE.ml b/asmcomp/amd64/CSE.ml new file mode 100644 index 000000000..63ef08853 --- /dev/null +++ b/asmcomp/amd64/CSE.ml @@ -0,0 +1,36 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* CSE for the AMD64 *) + +open Arch +open Mach +open CSEgen + +class cse = object (self) + +inherit cse_generic as super + +method! class_of_operation op = + match op with + | Ispecific(Ilea _) -> Op_pure + | Ispecific(Istore_int(_, _, is_asg)) -> Op_store is_asg + | Ispecific(Istore_symbol(_, _, is_asg)) -> Op_store is_asg + | Ispecific(Ioffset_loc(_, _)) -> Op_store true + | Ispecific(Ifloatarithmem _) -> Op_load + | _ -> super#class_of_operation op + +end + +let fundecl f = + (new cse)#fundecl f + diff --git a/asmcomp/amd64/arch.ml b/asmcomp/amd64/arch.ml index b0a5ffb8b..3741dd74b 100644 --- a/asmcomp/amd64/arch.ml +++ b/asmcomp/amd64/arch.ml @@ -33,8 +33,8 @@ type addressing_mode = type specific_operation = Ilea of addressing_mode (* "lea" gives scaled adds *) - | Istore_int of nativeint * addressing_mode (* Store an integer constant *) - | Istore_symbol of string * addressing_mode (* Store a symbol *) + | Istore_int of nativeint * addressing_mode * bool (* Store an integer constant *) + | Istore_symbol of string * addressing_mode * bool (* Store a symbol *) | Ioffset_loc of int * addressing_mode (* Add a constant to a location *) | Ifloatarithmem of float_operation * addressing_mode (* Float arith operation with memory *) @@ -101,10 +101,14 @@ let print_addressing printreg addr ppf arg = let print_specific_operation printreg op ppf arg = match op with | Ilea addr -> print_addressing printreg addr ppf arg - | Istore_int(n, addr) -> - fprintf ppf "[%a] := %nd" (print_addressing printreg addr) arg n - | Istore_symbol(lbl, addr) -> - fprintf ppf "[%a] := \"%s\"" (print_addressing printreg addr) arg lbl + | Istore_int(n, addr, is_assign) -> + fprintf ppf "[%a] := %nd %s" + (print_addressing printreg addr) arg n + (if is_assign then "(assign)" else "(init)") + | Istore_symbol(lbl, addr, is_assign) -> + fprintf ppf "[%a] := \"%s\" %s" + (print_addressing printreg addr) arg lbl + (if is_assign then "(assign)" else "(init)") | Ioffset_loc(n, addr) -> fprintf ppf "[%a] +:= %i" (print_addressing printreg addr) arg n | Isqrtf -> diff --git a/asmcomp/amd64/emit.mlp b/asmcomp/amd64/emit.mlp index bea7e9331..b576ece98 100644 --- a/asmcomp/amd64/emit.mlp +++ b/asmcomp/amd64/emit.mlp @@ -449,7 +449,7 @@ let emit_instr fallthrough i = | Double | Double_u -> ` movsd {emit_addressing addr i.arg 0}, {emit_reg dest}\n` end - | Lop(Istore(chunk, addr)) -> + | Lop(Istore(chunk, addr, _)) -> begin match chunk with | Word -> ` movq {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n` @@ -542,9 +542,9 @@ let emit_instr fallthrough i = ` cvttsd2siq {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n` | Lop(Ispecific(Ilea addr)) -> ` leaq {emit_addressing addr i.arg 0}, {emit_reg i.res.(0)}\n` - | Lop(Ispecific(Istore_int(n, addr))) -> + | Lop(Ispecific(Istore_int(n, addr, _))) -> ` movq ${emit_nativeint n}, {emit_addressing addr i.arg 0}\n` - | Lop(Ispecific(Istore_symbol(s, addr))) -> + | Lop(Ispecific(Istore_symbol(s, addr, _))) -> assert (not !pic_code && not !Clflags.dlcode); ` movq ${emit_symbol s}, {emit_addressing addr i.arg 0}\n` | Lop(Ispecific(Ioffset_loc(n, addr))) -> diff --git a/asmcomp/amd64/emit_nt.mlp b/asmcomp/amd64/emit_nt.mlp index cc158d889..a66f0c93b 100644 --- a/asmcomp/amd64/emit_nt.mlp +++ b/asmcomp/amd64/emit_nt.mlp @@ -443,7 +443,7 @@ let emit_instr fallthrough i = | Double | Double_u -> ` movsd {emit_reg dest}, REAL8 PTR {emit_addressing addr i.arg 0}\n` end - | Lop(Istore(chunk, addr)) -> + | Lop(Istore(chunk, addr, _)) -> begin match chunk with | Word -> ` mov QWORD PTR {emit_addressing addr i.arg 1}, {emit_reg i.arg.(0)}\n` @@ -532,9 +532,9 @@ let emit_instr fallthrough i = ` cvttsd2si {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n` | Lop(Ispecific(Ilea addr)) -> ` lea {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n` - | Lop(Ispecific(Istore_int(n, addr))) -> + | Lop(Ispecific(Istore_int(n, addr, _))) -> ` mov QWORD PTR {emit_addressing addr i.arg 0}, {emit_nativeint n}\n` - | Lop(Ispecific(Istore_symbol(s, addr))) -> + | Lop(Ispecific(Istore_symbol(s, addr, _))) -> assert (not !pic_code); add_used_symbol s; ` mov QWORD PTR {emit_addressing addr i.arg 0}, OFFSET {emit_symbol s}\n` diff --git a/asmcomp/amd64/proc.ml b/asmcomp/amd64/proc.ml index 5d448df36..cd06559e1 100644 --- a/asmcomp/amd64/proc.ml +++ b/asmcomp/amd64/proc.ml @@ -259,7 +259,7 @@ let destroyed_at_oper = function | Iop(Iextcall(_, false)) -> destroyed_at_c_call | Iop(Iintop(Idiv | Imod)) | Iop(Iintop_imm((Idiv | Imod), _)) -> [| rax; rdx |] - | Iop(Istore(Single, _)) -> [| rxmm15 |] + | Iop(Istore(Single, _, _)) -> [| rxmm15 |] | Iop(Ialloc _ | Iintop(Imulh | Icomp _) | Iintop_imm((Icomp _), _)) -> [| rax |] | Iswitch(_, _) -> [| rax; rdx |] @@ -290,7 +290,7 @@ let max_register_pressure = function if fp then [| 10; 16 |] else [| 11; 16 |] | Ialloc _ | Iintop(Icomp _) | Iintop_imm((Icomp _), _) -> if fp then [| 11; 16 |] else [| 12; 16 |] - | Istore(Single, _) -> + | Istore(Single, _, _) -> if fp then [| 12; 15 |] else [| 13; 15 |] | _ -> if fp then [| 12; 16 |] else [| 13; 16 |] diff --git a/asmcomp/amd64/selection.ml b/asmcomp/amd64/selection.ml index 5e6afbcab..fa7fe66c0 100644 --- a/asmcomp/amd64/selection.ml +++ b/asmcomp/amd64/selection.ml @@ -152,20 +152,20 @@ method select_addressing chunk exp = | Ascaledadd(e1, e2, scale) -> (Iindexed2scaled(scale, d), Ctuple[e1; e2]) -method! select_store addr exp = +method! select_store is_assign addr exp = match exp with Cconst_int n when self#is_immediate n -> - (Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple []) + (Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple []) | (Cconst_natint n | Cconst_blockheader n) when self#is_immediate_natint n -> - (Ispecific(Istore_int(n, addr)), Ctuple []) + (Ispecific(Istore_int(n, addr, is_assign)), Ctuple []) | Cconst_pointer n when self#is_immediate n -> - (Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple []) + (Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple []) | Cconst_natpointer n when self#is_immediate_natint n -> - (Ispecific(Istore_int(n, addr)), Ctuple []) + (Ispecific(Istore_int(n, addr, is_assign)), Ctuple []) | Cconst_symbol s when not (!pic_code || !Clflags.dlcode) -> - (Ispecific(Istore_symbol(s, addr)), Ctuple []) + (Ispecific(Istore_symbol(s, addr, is_assign)), Ctuple []) | _ -> - super#select_store addr exp + super#select_store is_assign addr exp method! select_operation op args = match op with diff --git a/asmcomp/arm/CSE.ml b/asmcomp/arm/CSE.ml new file mode 100644 index 000000000..00282f1f5 --- /dev/null +++ b/asmcomp/arm/CSE.ml @@ -0,0 +1,38 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* CSE for ARM *) + +open Arch +open Mach +open CSEgen + +class cse = object (self) + +inherit cse_generic as super + +method! class_of_operation op = + match op with + | Ispecific(Ishiftcheckbound _) -> Op_checkbound + | Ispecific _ -> Op_pure + | _ -> super#class_of_operation op + +method! is_cheap_operation op = + match op with + | Iconst_int n | Iconst_blockheader n -> n <= 255n && n >= 0n + | _ -> false + +end + +let fundecl f = + (new cse)#fundecl f + diff --git a/asmcomp/arm/emit.mlp b/asmcomp/arm/emit.mlp index 18c873de2..61035b85f 100644 --- a/asmcomp/arm/emit.mlp +++ b/asmcomp/arm/emit.mlp @@ -283,7 +283,7 @@ let num_literals = ref 0 (* Label a floating-point literal *) let float_literal f = - let repr = Int64.bits_of_float cst in + let repr = Int64.bits_of_float f in try List.assoc repr !float_literals with Not_found -> @@ -391,8 +391,7 @@ let emit_instr i = | Lop(Iconst_int n | Iconst_blockheader n) -> emit_intconst i.res.(0) (Nativeint.to_int32 n) | Lop(Iconst_float f) when !fpu = Soft -> - ` @ {emit_string f}\n`; - let bits = Int64.bits_of_float (float_of_string f) in + let bits = Int64.bits_of_float f in let high_bits = Int64.to_int32 (Int64.shift_right_logical bits 32) and low_bits = Int64.to_int32 bits in if is_immediate low_bits || is_immediate high_bits then begin @@ -407,7 +406,7 @@ let emit_instr i = end | Lop(Iconst_float f) when !fpu = VFPv2 -> let lbl = float_literal f in - ` fldd {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string f}\n`; + ` fldd {emit_reg i.res.(0)}, {emit_label lbl}\n`; 1 | Lop(Iconst_float f) -> let encode imm = @@ -426,12 +425,12 @@ let emit_instr i = let ex = ((ex + 3) land 0x07) lxor 0x04 in Some((sg lsl 7) lor (ex lsl 4) lor mn) end in - begin match encode (Int64.bits_of_float (float_of_string f)) with + begin match encode (Int64.bits_of_float f) with None -> let lbl = float_literal f in - ` fldd {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string f}\n` + ` fldd {emit_reg i.res.(0)}, {emit_label lbl}\n` | Some imm8 -> - ` fconstd {emit_reg i.res.(0)}, #{emit_int imm8} @ {emit_string f}\n` + ` fconstd {emit_reg i.res.(0)}, #{emit_int imm8}\n` end; 1 | Lop(Iconst_symbol s) -> emit_load_symbol_addr i.res.(0) s @@ -509,10 +508,10 @@ let emit_instr i = | Double_u -> "fldd" | _ (* 32-bit quantities *) -> "ldr" in ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 0}\n`; 1 - | Lop(Istore(Single, addr)) when !fpu >= VFPv2 -> + | Lop(Istore(Single, addr, _)) when !fpu >= VFPv2 -> ` fcvtsd s14, {emit_reg i.arg.(0)}\n`; ` fsts s14, {emit_addressing addr i.arg 1}\n`; 2 - | Lop(Istore((Double | Double_u), addr)) when !fpu = Soft -> + | Lop(Istore((Double | Double_u), addr, _)) when !fpu = Soft -> (* Use STM or STRD if possible *) begin match i.arg.(0), i.arg.(1), addr with {loc = Reg rt}, {loc = Reg rt2}, Iindexed 0 @@ -526,7 +525,7 @@ let emit_instr i = ` str {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 2}\n`; ` str {emit_reg i.arg.(1)}, {emit_addressing addr' i.arg 2}\n`; 2 end - | Lop(Istore(size, addr)) -> + | Lop(Istore(size, addr, _)) -> let r = i.arg.(0) in let instr = match size with diff --git a/asmcomp/arm/proc.ml b/asmcomp/arm/proc.ml index c49e3f087..a5bf3d5c8 100644 --- a/asmcomp/arm/proc.ml +++ b/asmcomp/arm/proc.ml @@ -203,7 +203,7 @@ let destroyed_at_oper = function [| phys_reg 3; phys_reg 8 |] (* r3 and r12 destroyed *) | Iop(Iintop Imulh) when !arch < ARMv6 -> [| phys_reg 8 |] (* r12 destroyed *) - | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _)) -> + | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _, _)) -> [| phys_reg 107 |] (* d7 (s14-s15) destroyed *) | _ -> [||] @@ -222,7 +222,7 @@ let max_register_pressure = function | Ialloc _ -> if abi = EABI then [| 7; 0; 0 |] else [| 7; 8; 8 |] | Iconst_symbol _ when !pic_code -> [| 7; 16; 32 |] | Iintoffloat | Ifloatofint - | Iload(Single, _) | Istore(Single, _) -> [| 9; 15; 31 |] + | Iload(Single, _) | Istore(Single, _, _) -> [| 9; 15; 31 |] | _ -> [| 9; 16; 32 |] (* Pure operations (without any side effect besides updating their result @@ -231,7 +231,8 @@ let max_register_pressure = function let op_is_pure = function | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _ - | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) -> false + | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) + | Ispecific(Ishiftcheckbound _) -> false | _ -> true (* Layout of the stack *) diff --git a/asmcomp/arm64/CSE.ml b/asmcomp/arm64/CSE.ml new file mode 100644 index 000000000..359e57eb5 --- /dev/null +++ b/asmcomp/arm64/CSE.ml @@ -0,0 +1,38 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* CSE for ARM64 *) + +open Arch +open Mach +open CSEgen + +class cse = object (self) + +inherit cse_generic as super + +method! class_of_operation op = + match op with + | Ispecific(Ishiftcheckbound _) -> Op_checkbound + | Ispecific _ -> Op_pure + | _ -> super#class_of_operation op + +method! is_cheap_operation op = + match op with + | Iconst_int n | Iconst_blockheader n -> n <= 65535n && n >= 0n + | _ -> false + +end + +let fundecl f = + (new cse)#fundecl f + diff --git a/asmcomp/arm64/emit.mlp b/asmcomp/arm64/emit.mlp index eaab3ef48..2c2454fde 100644 --- a/asmcomp/arm64/emit.mlp +++ b/asmcomp/arm64/emit.mlp @@ -328,9 +328,9 @@ let emit_instr i = | Lop(Iconst_float f) -> let b = Int64.bits_of_float f in if b = 0L then - ` fmov {emit_reg i.res.(0)}, xzr /* {emit_string f} */\n` + ` fmov {emit_reg i.res.(0)}, xzr\n` else if is_immediate_float b then - ` fmov {emit_reg i.res.(0)}, #{emit_printf "0x%Lx" b} /* {emit_string f} */\n` + ` fmov {emit_reg i.res.(0)}, #{emit_printf "0x%Lx" b}\n` else begin let lbl = float_literal b in ` adrp {emit_reg reg_tmp1}, {emit_label lbl}\n`; @@ -388,7 +388,7 @@ let emit_instr i = | Word | Double | Double_u -> ` ldr {emit_reg dst}, {emit_addressing addr base}\n` end - | Lop(Istore(size, addr)) -> + | Lop(Istore(size, addr, _)) -> let src = i.arg.(0) in let base = match addr with diff --git a/asmcomp/arm64/proc.ml b/asmcomp/arm64/proc.ml index b52c2fd8a..d2cda5c23 100644 --- a/asmcomp/arm64/proc.ml +++ b/asmcomp/arm64/proc.ml @@ -177,7 +177,7 @@ let destroyed_at_oper = function destroyed_at_c_call | Iop(Ialloc _) -> [| reg_x15 |] - | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _)) -> + | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _, _)) -> [| reg_d7 |] (* d7 / s7 destroyed *) | _ -> [||] @@ -194,9 +194,19 @@ let max_register_pressure = function | Iextcall(_, _) -> [| 10; 8 |] | Ialloc _ -> [| 25; 32 |] | Iintoffloat | Ifloatofint - | Iload(Single, _) | Istore(Single, _) -> [| 26; 31 |] + | Iload(Single, _) | Istore(Single, _, _) -> [| 26; 31 |] | _ -> [| 26; 32 |] +(* Pure operations (without any side effect besides updating their result + registers). *) + +let op_is_pure = function + | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ + | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _ + | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) + | Ispecific(Ishiftcheckbound _) -> false + | _ -> true + (* Layout of the stack *) let num_stack_slots = [| 0; 0 |] diff --git a/asmcomp/asmgen.ml b/asmcomp/asmgen.ml index ab71d51a0..311bb029b 100644 --- a/asmcomp/asmgen.ml +++ b/asmcomp/asmgen.ml @@ -64,6 +64,8 @@ let compile_fundecl (ppf : formatter) fd_cmm = ++ pass_dump_if ppf dump_selection "After instruction selection" ++ Comballoc.fundecl ++ pass_dump_if ppf dump_combine "After allocation combining" + ++ CSE.fundecl + ++ pass_dump_if ppf dump_cse "After CSE" ++ liveness ppf ++ Deadcode.fundecl ++ pass_dump_if ppf dump_live "Liveness analysis" diff --git a/asmcomp/i386/CSE.ml b/asmcomp/i386/CSE.ml new file mode 100644 index 000000000..3ce456702 --- /dev/null +++ b/asmcomp/i386/CSE.ml @@ -0,0 +1,48 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* CSE for the i386 *) + +open Cmm +open Arch +open Mach +open CSEgen + +class cse = object (self) + +inherit cse_generic as super + +method! class_of_operation op = + match op with + (* Operations that affect the floating-point stack cannot be factored *) + | Iconst_float _ | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf + | Iintoffloat | Ifloatofint + | Iload((Single | Double | Double_u), _) -> Op_other + (* Specific ops *) + | Ispecific(Ilea _) -> Op_pure + | Ispecific(Istore_int(_, _, is_asg)) -> Op_store is_asg + | Ispecific(Istore_symbol(_, _, is_asg)) -> Op_store is_asg + | Ispecific(Ioffset_loc(_, _)) -> Op_store true + | Ispecific _ -> Op_other + | _ -> super#class_of_operation op + +method! is_cheap_operation op = + match op with + | Iconst_int _ | Iconst_blockheader _ -> true + | Iconst_symbol _ -> true + | _ -> false + +end + +let fundecl f = + (new cse)#fundecl f + diff --git a/asmcomp/i386/arch.ml b/asmcomp/i386/arch.ml index d2f9fd61a..0d2130445 100644 --- a/asmcomp/i386/arch.ml +++ b/asmcomp/i386/arch.ml @@ -31,8 +31,8 @@ type addressing_mode = type specific_operation = Ilea of addressing_mode (* Lea gives scaled adds *) - | Istore_int of nativeint * addressing_mode (* Store an integer constant *) - | Istore_symbol of string * addressing_mode (* Store a symbol *) + | Istore_int of nativeint * addressing_mode * bool (* Store an integer constant *) + | Istore_symbol of string * addressing_mode * bool (* Store a symbol *) | Ioffset_loc of int * addressing_mode (* Add a constant to a location *) | Ipush (* Push regs on stack *) | Ipush_int of nativeint (* Push an integer constant *) @@ -105,11 +105,14 @@ let print_addressing printreg addr ppf arg = let print_specific_operation printreg op ppf arg = match op with | Ilea addr -> print_addressing printreg addr ppf arg - | Istore_int(n, addr) -> - fprintf ppf "[%a] := %s" (print_addressing printreg addr) arg - (Nativeint.to_string n) - | Istore_symbol(lbl, addr) -> - fprintf ppf "[%a] := \"%s\"" (print_addressing printreg addr) arg lbl + | Istore_int(n, addr, is_assign) -> + fprintf ppf "[%a] := %nd %s" + (print_addressing printreg addr) arg n + (if is_assign then "(assign)" else "(init)") + | Istore_symbol(lbl, addr, is_assign) -> + fprintf ppf "[%a] := \"%s\" %s" + (print_addressing printreg addr) arg lbl + (if is_assign then "(assign)" else "(init)") | Ioffset_loc(n, addr) -> fprintf ppf "[%a] +:= %i" (print_addressing printreg addr) arg n | Ipush -> diff --git a/asmcomp/i386/emit.mlp b/asmcomp/i386/emit.mlp index 532c43dfe..98df5f958 100644 --- a/asmcomp/i386/emit.mlp +++ b/asmcomp/i386/emit.mlp @@ -544,7 +544,7 @@ let emit_instr fallthrough i = | Double | Double_u -> ` fldl {emit_addressing addr i.arg 0}\n` end - | Lop(Istore(chunk, addr)) -> + | Lop(Istore(chunk, addr, _)) -> begin match chunk with | Word | Thirtytwo_signed | Thirtytwo_unsigned -> ` movl {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n` @@ -684,9 +684,9 @@ let emit_instr fallthrough i = stack_offset := !stack_offset + 8 | Lop(Ispecific(Ilea addr)) -> ` lea {emit_addressing addr i.arg 0}, {emit_reg i.res.(0)}\n` - | Lop(Ispecific(Istore_int(n, addr))) -> + | Lop(Ispecific(Istore_int(n, addr, _))) -> ` movl ${emit_nativeint n}, {emit_addressing addr i.arg 0}\n` - | Lop(Ispecific(Istore_symbol(s, addr))) -> + | Lop(Ispecific(Istore_symbol(s, addr, _))) -> ` movl ${emit_symbol s}, {emit_addressing addr i.arg 0}\n` | Lop(Ispecific(Ioffset_loc(n, addr))) -> ` addl ${emit_int n}, {emit_addressing addr i.arg 0}\n` diff --git a/asmcomp/i386/emit_nt.mlp b/asmcomp/i386/emit_nt.mlp index 5d030ec07..a9c9db3e4 100644 --- a/asmcomp/i386/emit_nt.mlp +++ b/asmcomp/i386/emit_nt.mlp @@ -480,7 +480,7 @@ let emit_instr i = | Double | Double_u -> ` fld REAL8 PTR {emit_addressing addr i.arg 0}\n` end - | Lop(Istore(chunk, addr)) -> + | Lop(Istore(chunk, addr, _)) -> begin match chunk with | Word | Thirtytwo_signed | Thirtytwo_unsigned -> ` mov DWORD PTR {emit_addressing addr i.arg 1}, {emit_reg i.arg.(0)}\n` @@ -618,9 +618,9 @@ let emit_instr i = stack_offset := !stack_offset + 8 | Lop(Ispecific(Ilea addr)) -> ` lea {emit_reg i.res.(0)}, DWORD PTR {emit_addressing addr i.arg 0}\n` - | Lop(Ispecific(Istore_int(n, addr))) -> + | Lop(Ispecific(Istore_int(n, addr, _))) -> ` mov DWORD PTR {emit_addressing addr i.arg 0},{emit_nativeint n}\n` - | Lop(Ispecific(Istore_symbol(s, addr))) -> + | Lop(Ispecific(Istore_symbol(s, addr, _))) -> add_used_symbol s ; ` mov DWORD PTR {emit_addressing addr i.arg 0},OFFSET {emit_symbol s}\n` | Lop(Ispecific(Ioffset_loc(n, addr))) -> diff --git a/asmcomp/i386/selection.ml b/asmcomp/i386/selection.ml index d86f1b282..10d2d40e3 100644 --- a/asmcomp/i386/selection.ml +++ b/asmcomp/i386/selection.ml @@ -135,7 +135,7 @@ let pseudoregs_for_operation op arg res = (* For storing a byte, the argument must be in eax...edx. (But for a short, any reg will do!) Keep it simple, just force the argument to be in edx. *) - | Istore((Byte_unsigned | Byte_signed), addr) -> + | Istore((Byte_unsigned | Byte_signed), addr, _) -> let newarg = Array.copy arg in newarg.(0) <- edx; (newarg, res, false) @@ -178,20 +178,20 @@ method select_addressing chunk exp = | (Ascaledadd(e1, e2, scale), d) -> (Iindexed2scaled(scale, d), Ctuple[e1; e2]) -method! select_store addr exp = +method! select_store is_assign addr exp = match exp with Cconst_int n -> - (Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple []) + (Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple []) | (Cconst_natint n | Cconst_blockheader n) -> - (Ispecific(Istore_int(n, addr)), Ctuple []) + (Ispecific(Istore_int(n, addr, is_assign)), Ctuple []) | Cconst_pointer n -> - (Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple []) + (Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple []) | Cconst_natpointer n -> - (Ispecific(Istore_int(n, addr)), Ctuple []) + (Ispecific(Istore_int(n, addr, is_assign)), Ctuple []) | Cconst_symbol s -> - (Ispecific(Istore_symbol(s, addr)), Ctuple []) + (Ispecific(Istore_symbol(s, addr, is_assign)), Ctuple []) | _ -> - super#select_store addr exp + super#select_store is_assign addr exp method! select_operation op args = match op with diff --git a/asmcomp/mach.ml b/asmcomp/mach.ml index 6b141230f..3a7174763 100644 --- a/asmcomp/mach.ml +++ b/asmcomp/mach.ml @@ -46,7 +46,7 @@ type operation = | Iextcall of string * bool | Istackoffset of int | Iload of Cmm.memory_chunk * Arch.addressing_mode - | Istore of Cmm.memory_chunk * Arch.addressing_mode + | Istore of Cmm.memory_chunk * Arch.addressing_mode * bool | Ialloc of int | Iintop of integer_operation | Iintop_imm of integer_operation * int diff --git a/asmcomp/mach.mli b/asmcomp/mach.mli index 30643730d..618e5e4ce 100644 --- a/asmcomp/mach.mli +++ b/asmcomp/mach.mli @@ -43,10 +43,11 @@ type operation = | Icall_imm of string | Itailcall_ind | Itailcall_imm of string - | Iextcall of string * bool + | Iextcall of string * bool (* false = noalloc, true = alloc *) | Istackoffset of int | Iload of Cmm.memory_chunk * Arch.addressing_mode - | Istore of Cmm.memory_chunk * Arch.addressing_mode + | Istore of Cmm.memory_chunk * Arch.addressing_mode * bool + (* false = initialization, true = assignment *) | Ialloc of int | Iintop of integer_operation | Iintop_imm of integer_operation * int diff --git a/asmcomp/power/CSE.ml b/asmcomp/power/CSE.ml new file mode 100644 index 000000000..50fefa5e3 --- /dev/null +++ b/asmcomp/power/CSE.ml @@ -0,0 +1,38 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* CSE for the PowerPC *) + +open Arch +open Mach +open CSEgen + +class cse = object (self) + +inherit cse_generic as super + +method! class_of_operation op = + match op with + | Ispecific(Imultaddf | Imultsubf) -> Op_pure + | Ispecific(Ialloc_far _) -> Op_other + | _ -> super#class_of_operation op + +method! is_cheap_operation op = + match op with + | Iconst_int n | Iconst_blockheader n -> n <= 32767n && n >= -32768n + | _ -> false + +end + +let fundecl f = + (new cse)#fundecl f + diff --git a/asmcomp/power/emit.mlp b/asmcomp/power/emit.mlp index f31d632c1..0a26ed147 100644 --- a/asmcomp/power/emit.mlp +++ b/asmcomp/power/emit.mlp @@ -333,7 +333,7 @@ let instr_size = function if chunk = Byte_signed then load_store_size addr + 1 else load_store_size addr - | Lop(Istore(chunk, addr)) -> load_store_size addr + | Lop(Istore(chunk, addr, _)) -> load_store_size addr | Lop(Ialloc n) -> 4 | Lop(Ispecific(Ialloc_far n)) -> 5 | Lop(Iintop Imod) -> 3 @@ -548,7 +548,7 @@ let rec emit_instr i dslot = emit_load_store loadinstr addr i.arg 0 i.res.(0); if chunk = Byte_signed then ` extsb {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` - | Lop(Istore(chunk, addr)) -> + | Lop(Istore(chunk, addr, _)) -> let storeinstr = match chunk with Byte_unsigned | Byte_signed -> "stb" diff --git a/asmcomp/power/scheduling.ml b/asmcomp/power/scheduling.ml index 6e594f028..7adaa2eed 100644 --- a/asmcomp/power/scheduling.ml +++ b/asmcomp/power/scheduling.ml @@ -44,7 +44,7 @@ method reload_retaddr_latency = 12 method oper_issue_cycles = function Iconst_float _ | Iconst_symbol _ -> 2 | Iload(_, Ibased(_, _)) -> 2 - | Istore(_, Ibased(_, _)) -> 2 + | Istore(_, Ibased(_, _), _) -> 2 | Ialloc _ -> 4 | Iintop(Imod) -> 40 (* assuming full stall *) | Iintop(Icomp _) -> 4 diff --git a/asmcomp/printmach.ml b/asmcomp/printmach.ml index 0c577890d..a39160d28 100644 --- a/asmcomp/printmach.ml +++ b/asmcomp/printmach.ml @@ -119,12 +119,13 @@ let operation op arg ppf res = | Iload(chunk, addr) -> fprintf ppf "%s[%a]" (Printcmm.chunk chunk) (Arch.print_addressing reg addr) arg - | Istore(chunk, addr) -> - fprintf ppf "%s[%a] := %a" + | Istore(chunk, addr, is_assign) -> + fprintf ppf "%s[%a] := %a %s" (Printcmm.chunk chunk) (Arch.print_addressing reg addr) (Array.sub arg 1 (Array.length arg - 1)) reg arg.(0) + (if is_assign then "(assign)" else "(init)") | Ialloc n -> fprintf ppf "alloc %i" n | Iintop(op) -> fprintf ppf "%a%s%a" reg arg.(0) (intop op) reg arg.(1) | Iintop_imm(op, n) -> fprintf ppf "%a%s%i" reg arg.(0) (intop op) n diff --git a/asmcomp/schedgen.ml b/asmcomp/schedgen.ml index e04eacd37..eb91854a5 100644 --- a/asmcomp/schedgen.ml +++ b/asmcomp/schedgen.ml @@ -165,7 +165,7 @@ method private instr_in_basic_block instr = load or store instructions (e.g. on the I386). *) method is_store = function - Istore(_, _) -> true + Istore(_, _, _) -> true | _ -> false method is_load = function diff --git a/asmcomp/selectgen.ml b/asmcomp/selectgen.ml index a8f073e53..e30d6fec3 100644 --- a/asmcomp/selectgen.ml +++ b/asmcomp/selectgen.ml @@ -209,8 +209,8 @@ method virtual select_addressing : (* Default instruction selection for stores (of words) *) -method select_store addr arg = - (Istore(Word, addr), arg) +method select_store is_assign addr arg = + (Istore(Word, addr, is_assign), arg) (* call marking methods, documented in selectgen.mli *) @@ -256,10 +256,10 @@ method select_operation op args = | (Cstore chunk, [arg1; arg2]) -> let (addr, eloc) = self#select_addressing chunk arg1 in if chunk = Word then begin - let (op, newarg2) = self#select_store addr arg2 in + let (op, newarg2) = self#select_store true addr arg2 in (op, [newarg2; eloc]) end else begin - (Istore(chunk, addr), [arg2; eloc]) + (Istore(chunk, addr, true), [arg2; eloc]) (* Inversion addr/datum in Istore *) end | (Calloc, _) -> (Ialloc 0, args) @@ -677,16 +677,16 @@ method emit_stores env data regs_addr = ref (Arch.offset_addressing Arch.identity_addressing (-Arch.size_int)) in List.iter (fun e -> - let (op, arg) = self#select_store !a e in + let (op, arg) = self#select_store false !a e in match self#emit_expr env arg with None -> assert false | Some regs -> match op with - Istore(_, _) -> + Istore(_, _, _) -> for i = 0 to Array.length regs - 1 do let r = regs.(i) in let kind = if r.typ = Float then Double_u else Word in - self#insert (Iop(Istore(kind, !a))) + self#insert (Iop(Istore(kind, !a, false))) (Array.append [|r|] regs_addr) [||]; a := Arch.offset_addressing !a (size_component r.typ) done diff --git a/asmcomp/selectgen.mli b/asmcomp/selectgen.mli index 7012c900c..abc6db5eb 100644 --- a/asmcomp/selectgen.mli +++ b/asmcomp/selectgen.mli @@ -35,7 +35,8 @@ class virtual selector_generic : object method select_condition : Cmm.expression -> Mach.test * Cmm.expression (* Can be overridden to deal with special test instructions *) method select_store : - Arch.addressing_mode -> Cmm.expression -> Mach.operation * Cmm.expression + bool -> Arch.addressing_mode -> Cmm.expression -> + Mach.operation * Cmm.expression (* Can be overridden to deal with special store constant instructions *) method regs_for : Cmm.machtype -> Reg.t array (* Return an array of fresh registers of the given type. diff --git a/asmcomp/sparc/CSE.ml b/asmcomp/sparc/CSE.ml new file mode 100644 index 000000000..c38bab8fe --- /dev/null +++ b/asmcomp/sparc/CSE.ml @@ -0,0 +1,31 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* CSE for Sparc *) + +open Mach +open CSEgen + +class cse = object (self) + +inherit cse_generic (* as super *) + +method! is_cheap_operation op = + match op with + | Iconst_int n | Iconst_blockheader n -> n <= 4095n && n >= -4096n + | _ -> false + +end + +let fundecl f = + (new cse)#fundecl f + diff --git a/asmcomp/sparc/emit.mlp b/asmcomp/sparc/emit.mlp index 2793776ff..877a3d52a 100644 --- a/asmcomp/sparc/emit.mlp +++ b/asmcomp/sparc/emit.mlp @@ -375,7 +375,7 @@ let rec emit_instr i dslot = | _ -> "ld" in emit_load loadinstr addr i.arg dest end - | Lop(Istore(chunk, addr)) -> + | Lop(Istore(chunk, addr, _)) -> let src = i.arg.(0) in begin match chunk with Double_u -> @@ -612,7 +612,7 @@ let is_one_instr i = | Iconst_int n | Iconst_blockheader n -> is_native_immediate n | Istackoffset _ -> true | Iload(_, Iindexed n) -> i.res.(0).typ <> Float && is_immediate n - | Istore(_, Iindexed n) -> i.arg.(0).typ <> Float && is_immediate n + | Istore(_, Iindexed n, _) -> i.arg.(0).typ <> Float && is_immediate n | Iintop(op) -> is_one_instr_op op | Iintop_imm(op, _) -> is_one_instr_op op | Iaddf | Isubf | Imulf | Idivf -> true diff --git a/driver/main_args.ml b/driver/main_args.ml index 7d1c40256..f72a08fff 100644 --- a/driver/main_args.ml +++ b/driver/main_args.ml @@ -389,6 +389,10 @@ let mk_dcombine f = "-dcombine", Arg.Unit f, " (undocumented)" ;; +let mk_dcse f = + "-dcse", Arg.Unit f, " (undocumented)" +;; + let mk_dlive f = "-dlive", Arg.Unit f, " (undocumented)" ;; @@ -599,6 +603,7 @@ module type Optcomp_options = sig val _dcmm : unit -> unit val _dsel : unit -> unit val _dcombine : unit -> unit + val _dcse : unit -> unit val _dlive : unit -> unit val _dspill : unit -> unit val _dsplit : unit -> unit @@ -651,6 +656,7 @@ module type Opttop_options = sig val _dcmm : unit -> unit val _dsel : unit -> unit val _dcombine : unit -> unit + val _dcse : unit -> unit val _dlive : unit -> unit val _dspill : unit -> unit val _dsplit : unit -> unit @@ -848,6 +854,7 @@ struct mk_dcmm F._dcmm; mk_dsel F._dsel; mk_dcombine F._dcombine; + mk_dcse F._dcse; mk_dlive F._dlive; mk_dspill F._dspill; mk_dsplit F._dsplit; @@ -900,6 +907,7 @@ module Make_opttop_options (F : Opttop_options) = struct mk_dcmm F._dcmm; mk_dsel F._dsel; mk_dcombine F._dcombine; + mk_dcse F._dcse; mk_dlive F._dlive; mk_dspill F._dspill; mk_dsplit F._dsplit; diff --git a/driver/main_args.mli b/driver/main_args.mli index 6ebf95c9a..7d957d009 100644 --- a/driver/main_args.mli +++ b/driver/main_args.mli @@ -177,6 +177,7 @@ module type Optcomp_options = sig val _dcmm : unit -> unit val _dsel : unit -> unit val _dcombine : unit -> unit + val _dcse : unit -> unit val _dlive : unit -> unit val _dspill : unit -> unit val _dsplit : unit -> unit @@ -229,6 +230,7 @@ module type Opttop_options = sig val _dcmm : unit -> unit val _dsel : unit -> unit val _dcombine : unit -> unit + val _dcse : unit -> unit val _dlive : unit -> unit val _dspill : unit -> unit val _dsplit : unit -> unit diff --git a/driver/optmain.ml b/driver/optmain.ml index 2fb1a22b3..c8060b6a9 100644 --- a/driver/optmain.ml +++ b/driver/optmain.ml @@ -135,6 +135,7 @@ module Options = Main_args.Make_optcomp_options (struct let _dcmm = set dump_cmm let _dsel = set dump_selection let _dcombine = set dump_combine + let _dcse = set dump_cse let _dlive () = dump_live := true; Printmach.print_live := true let _dspill = set dump_spill let _dsplit = set dump_split diff --git a/tools/ocamloptp.ml b/tools/ocamloptp.ml index 59cb661d9..69c288767 100644 --- a/tools/ocamloptp.ml +++ b/tools/ocamloptp.ml @@ -106,6 +106,7 @@ module Options = Main_args.Make_optcomp_options (struct let _dcmm = option "-dcmm" let _dsel = option "-dsel" let _dcombine = option "-dcombine" + let _dcse = option "-dcse" let _dlive = option "-dlive" let _dspill = option "-dspill" let _dsplit = option "-dsplit" diff --git a/utils/clflags.ml b/utils/clflags.ml index d0484b772..90b8a948f 100644 --- a/utils/clflags.ml +++ b/utils/clflags.ml @@ -73,6 +73,7 @@ let optimize_for_speed = ref true (* -compact *) and dump_cmm = ref false (* -dcmm *) let dump_selection = ref false (* -dsel *) +let dump_cse = ref false (* -dcse *) let dump_live = ref false (* -dlive *) let dump_spill = ref false (* -dspill *) let dump_split = ref false (* -dsplit *) diff --git a/utils/clflags.mli b/utils/clflags.mli index 02c378d23..489ac4d7b 100644 --- a/utils/clflags.mli +++ b/utils/clflags.mli @@ -68,6 +68,7 @@ val keep_asm_file : bool ref val optimize_for_speed : bool ref val dump_cmm : bool ref val dump_selection : bool ref +val dump_cse : bool ref val dump_live : bool ref val dump_spill : bool ref val dump_split : bool ref |