New back-end optimization pass: common subexpression elimination (CSE).

(Reuses results of previous computations instead of recomputing them.) (Cherry-picked from branch backend-optim.) Tested on amd64/linux and i386/linux. Other back-ends compile (after assorted updates) but are untested. git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@14688 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
author: Xavier Leroy <xavier.leroy@inria.fr> 2014-04-26 10:40:22 +0000
committer: Xavier Leroy <xavier.leroy@inria.fr> 2014-04-26 10:40:22 +0000
commit: 558f40e3446854913d5ce011441c4b10da03f27e (patch)
tree: 84c78a2d3098937813daae9bd75d328dc0669840
parent: 95d98cd9782c0577b0c7290f6535b29e7bd4cd41 (diff)
40 files changed, 660 insertions, 82 deletions
diff --git a/.depend b/.depend
index 9162031fa..1d36a9c89 100644
--- a/.depend
+++ b/.depend
@@ -576,6 +576,7 @@ bytecomp/typeopt.cmo : typing/types.cmi typing/typedtree.cmi \
 bytecomp/typeopt.cmx : typing/types.cmx typing/typedtree.cmx \
     typing/predef.cmx typing/path.cmx bytecomp/lambda.cmx typing/ident.cmx \
     typing/env.cmx typing/ctype.cmx bytecomp/typeopt.cmi
+asmcomp/CSEgen.cmi : asmcomp/mach.cmi
 asmcomp/asmgen.cmi : bytecomp/lambda.cmi asmcomp/cmm.cmi
 asmcomp/asmlibrarian.cmi :
 asmcomp/asmlink.cmi : asmcomp/cmx_format.cmi
@@ -618,6 +619,10 @@ asmcomp/selection.cmi : asmcomp/mach.cmi asmcomp/cmm.cmi
 asmcomp/spill.cmi : asmcomp/mach.cmi
 asmcomp/split.cmi : asmcomp/mach.cmi
 asmcomp/strmatch.cmi : asmcomp/cmm.cmi
+asmcomp/CSE.cmo : asmcomp/mach.cmi asmcomp/CSEgen.cmi asmcomp/arch.cmo
+asmcomp/CSE.cmx : asmcomp/mach.cmx asmcomp/CSEgen.cmx asmcomp/arch.cmx
+asmcomp/CSEgen.cmo : asmcomp/reg.cmi asmcomp/mach.cmi asmcomp/CSEgen.cmi
+asmcomp/CSEgen.cmx : asmcomp/reg.cmx asmcomp/mach.cmx asmcomp/CSEgen.cmi
 asmcomp/arch.cmo :
 asmcomp/arch.cmx :
 asmcomp/asmgen.cmo : bytecomp/translmod.cmi asmcomp/split.cmi \
@@ -629,7 +634,7 @@ asmcomp/asmgen.cmo : bytecomp/translmod.cmi asmcomp/split.cmi \
     asmcomp/emitaux.cmi asmcomp/emit.cmi asmcomp/deadcode.cmi \
     utils/config.cmi asmcomp/compilenv.cmi asmcomp/comballoc.cmi \
     asmcomp/coloring.cmi asmcomp/cmmgen.cmi asmcomp/cmm.cmi \
-    asmcomp/closure.cmi utils/clflags.cmi asmcomp/asmgen.cmi
+    asmcomp/closure.cmi utils/clflags.cmi asmcomp/CSE.cmo asmcomp/asmgen.cmi
 asmcomp/asmgen.cmx : bytecomp/translmod.cmx asmcomp/split.cmx \
     asmcomp/spill.cmx asmcomp/selection.cmx asmcomp/scheduling.cmx \
     asmcomp/reload.cmx asmcomp/reg.cmx asmcomp/proc.cmx asmcomp/printmach.cmx \
@@ -639,7 +644,7 @@ asmcomp/asmgen.cmx : bytecomp/translmod.cmx asmcomp/split.cmx \
     asmcomp/emitaux.cmx asmcomp/emit.cmx asmcomp/deadcode.cmx \
     utils/config.cmx asmcomp/compilenv.cmx asmcomp/comballoc.cmx \
     asmcomp/coloring.cmx asmcomp/cmmgen.cmx asmcomp/cmm.cmx \
-    asmcomp/closure.cmx utils/clflags.cmx asmcomp/asmgen.cmi
+    asmcomp/closure.cmx utils/clflags.cmx asmcomp/CSE.cmx asmcomp/asmgen.cmi
 asmcomp/asmlibrarian.cmo : utils/misc.cmi parsing/location.cmi \
     utils/config.cmi asmcomp/compilenv.cmi asmcomp/cmx_format.cmi \
     utils/clflags.cmi asmcomp/clambda.cmi utils/ccomp.cmi asmcomp/asmlink.cmi \
diff --git a/Changes b/Changes
index 3f32d5dee..0e4c42009 100644
--- a/Changes
+++ b/Changes
@@ -39,6 +39,8 @@ Compilers:
   int32/int64/nativeint arithmetic.  Constant propagation for floats
   can be turned off with option -no-float-const-prop, for codes that
   change FP rounding modes at run-time.
+- New back-end optimization pass: common subexpression elimination (CSE).
+  (Reuses results of previous computations instead of recomputing them.)
 - New back-end optimization pass: dead code elimination.
   (Removes arithmetic and load instructions whose results are unused.)
 - PR#6269 Optimization of string matching (patch by Benoit Vaugon
diff --git a/Makefile b/Makefile
index 594c650e7..877df08ef 100644
--- a/Makefile
+++ b/Makefile
@@ -84,7 +84,9 @@ ASMCOMP=asmcomp/arch.cmo asmcomp/debuginfo.cmo \
   asmcomp/clambda.cmo asmcomp/printclambda.cmo asmcomp/compilenv.cmo \
   asmcomp/closure.cmo asmcomp/strmatch.cmo asmcomp/cmmgen.cmo \
   asmcomp/printmach.cmo asmcomp/selectgen.cmo asmcomp/selection.cmo \
-  asmcomp/comballoc.cmo asmcomp/liveness.cmo \
+  asmcomp/comballoc.cmo \
+  asmcomp/CSEgen.cmo asmcomp/CSE.cmo \
+  asmcomp/liveness.cmo \
   asmcomp/spill.cmo asmcomp/split.cmo \
   asmcomp/interf.cmo asmcomp/coloring.cmo \
   asmcomp/reloadgen.cmo asmcomp/reload.cmo \
@@ -589,6 +591,14 @@ partialclean::
 
 beforedepend:: asmcomp/selection.ml
 
+asmcomp/CSE.ml: asmcomp/$(ARCH)/CSE.ml
+	ln -s $(ARCH)/CSE.ml asmcomp/CSE.ml
+
+partialclean::
+	rm -f asmcomp/CSE.ml
+
+beforedepend:: asmcomp/CSE.ml
+
 asmcomp/reload.ml: asmcomp/$(ARCH)/reload.ml
 	ln -s $(ARCH)/reload.ml asmcomp/reload.ml
 
diff --git a/asmcomp/CSEgen.ml b/asmcomp/CSEgen.ml
new file mode 100644
index 000000000..1cbef266b
--- /dev/null
+++ b/asmcomp/CSEgen.ml
@@ -0,0 +1,258 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
+(*                                                                     *)
+(*  Copyright 2014 Institut National de Recherche en Informatique et   *)
+(*  en Automatique.  All rights reserved.  This file is distributed    *)
+(*  under the terms of the Q Public License version 1.0.               *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* Common subexpression elimination by value numbering over extended
+   basic blocks. *)
+
+open Mach
+
+type valnum = int
+
+(* We maintain sets of equations of the form
+       valnums = operation(valnums)
+   plus a mapping from registers to value numbers.  *)
+
+type rhs = operation * valnum array
+
+module Equations =
+  Map.Make(struct type t = rhs let compare = Pervasives.compare end)
+
+type numbering =
+  { num_next: int;                      (* next fresh value number *)
+    num_eqs: valnum array Equations.t;  (* mapping rhs -> valnums *)
+    num_reg: valnum Reg.Map.t }         (* mapping register -> valnum *)
+
+let empty_numbering =
+  { num_next = 0; num_eqs = Equations.empty; num_reg = Reg.Map.empty }
+
+(** [valnum_reg n r] returns the value number for the contents of
+  register [r].  If none exists, a fresh value number is returned
+  and associated with register [r].  The possibly updated numbering
+  is also returned.  [valnum_regs] is similar, but for an array of
+  registers. *)
+
+let valnum_reg n r =
+  try
+    (n, Reg.Map.find r n.num_reg)
+  with Not_found ->
+    let v = n.num_next in
+    ({n with num_next = v + 1; num_reg = Reg.Map.add r v n.num_reg}, v)
+
+let valnum_regs n rs =
+  let l = Array.length rs in
+  let vs = Array.make l 0 in
+  let n = ref n in
+  for i = 0 to l-1 do
+    let (ni, vi) = valnum_reg !n rs.(i) in
+    vs.(i) <- vi;
+    n := ni
+  done;
+  (!n, vs)
+
+(* Look up the set of equations for an equation with the given rhs.
+   Return [Some res] if there is one, where [res] is the lhs. *)
+
+let find_equation n rhs =
+  try
+    Some(Equations.find rhs n.num_eqs)
+  with Not_found ->
+    None
+
+(* Find a set of registers containing the given value numbers. *)
+
+let find_regs_containing n vs =
+  match Array.length vs with
+  | 0 -> Some [||]
+  | 1 -> let v = vs.(0) in
+         Reg.Map.fold (fun r v' res -> if v' = v then Some [|r|] else res)
+                      n.num_reg None
+  | _ -> assert false
+
+(* Associate the given value numbers to the given result registers,
+   without adding new equations. *)
+
+let set_known_regs n rs vs =
+  match Array.length rs with
+  | 0 -> n
+  | 1 -> { n with num_reg = Reg.Map.add rs.(0) vs.(0) n.num_reg }
+  | _ -> assert false
+
+(* Record the effect of a move: no new equations, but the result reg
+   maps to the same value number as the argument reg. *)
+
+let set_move n src dst =
+  let (n1, v) = valnum_reg n src in
+  { n1 with num_reg = Reg.Map.add dst v n1.num_reg }
+
+(* Record the equation [fresh valnums = rhs] and associate the given
+   result registers [rs] to [fresh valnums]. *)
+
+let set_fresh_regs n rs rhs =
+  match Array.length rs with
+  | 0 -> { n with num_eqs = Equations.add rhs [||] n.num_eqs }
+  | 1 -> let v = n.num_next in
+         { num_next = v + 1;
+           num_eqs = Equations.add rhs [|v|] n.num_eqs;
+           num_reg = Reg.Map.add rs.(0) v n.num_reg }
+  | _ -> assert false
+
+(* Forget everything we know about the given result registers,
+   which are receiving unpredictable values at run-time. *)
+
+let set_unknown_regs n rs =
+  { n with num_reg = Array.fold_right Reg.Map.remove rs n.num_reg }
+
+(* Keep only the equations satisfying the given predicate. *)
+
+let filter_equations pred n =
+  { n with num_eqs = Equations.filter (fun (op,_) res -> pred op) n.num_eqs }
+
+(* Prepend a reg-reg move *)
+
+let insert_move srcs dsts i =
+  match Array.length srcs with
+  | 0 -> i
+  | 1 -> instr_cons (Iop Imove) srcs dsts i
+  | _ -> assert false
+
+(* Classification of operations *)
+
+type op_class =
+  | Op_pure     (* pure, produce one result *)
+  | Op_checkbound     (* checkbound-style: no result, can raise an exn *)
+  | Op_load           (* memory load *)
+  | Op_store of bool  (* memory store, false = init, true = assign *)
+  | Op_other          (* anything else that does not store in memory *)
+
+class cse_generic = object (self)
+
+(* Default classification of operations.  Can be overriden in
+   processor-specific files to classify specific operations better. *)
+
+method class_of_operation op =
+  match op with
+  | Imove | Ispill | Ireload -> assert false   (* treated specially *)
+  | Iconst_int _ | Iconst_float _ | Iconst_symbol _
+  | Iconst_blockheader _ -> Op_pure
+  | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
+  | Iextcall _ -> assert false                 (* treated specially *)
+  | Istackoffset _ -> Op_other
+  | Iload(_,_) -> Op_load
+  | Istore(_,_,asg) -> Op_store asg
+  | Ialloc _ -> Op_other
+  | Iintop(Icheckbound) -> Op_checkbound
+  | Iintop _ -> Op_pure
+  | Iintop_imm(Icheckbound, _) -> Op_checkbound
+  | Iintop_imm(_, _) -> Op_pure
+  | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf
+  | Ifloatofint | Iintoffloat -> Op_pure
+  | Ispecific _ -> Op_other
+
+(* Operations that are so cheap that it isn't worth factoring them. *)
+
+method is_cheap_operation op =
+  match op with
+  | Iconst_int _ | Iconst_blockheader _ -> true
+  | _ -> false
+
+(* Forget all equations involving memory loads.  Performed after a
+   non-initializing store *)
+
+method private kill_loads n =
+  filter_equations (fun o -> self#class_of_operation o <> Op_load) n
+
+(* Keep only equations involving checkbounds, and forget register values.
+   Performed across a call. *)
+
+method private keep_checkbounds n =
+  filter_equations (fun o -> self#class_of_operation o = Op_checkbound)
+                   {n with num_reg = Reg.Map.empty }
+
+(* Perform CSE on the given instruction [i] and its successors.
+   [n] is the value numbering current at the beginning of [i]. *)
+
+method private cse n i =
+  match i.desc with
+  | Iend | Ireturn | Iop(Itailcall_ind) | Iop(Itailcall_imm _)
+  | Iexit _ | Iraise _ ->
+      i
+  | Iop (Imove | Ispill | Ireload) ->
+      (* For moves, we associate the same value number to the result reg
+         as to the argument reg. *)
+      let n1 = set_move n i.arg.(0) i.res.(0) in
+      {i with next = self#cse n1 i.next}
+  | Iop (Icall_ind | Icall_imm _ | Iextcall _) ->
+      (* We don't perform CSE across function calls, as it increases
+         register pressure too much.  We do remember the checkbound
+         instructions already performed, though, since their reuse
+         cannot increase register pressure. *)
+      let n1 = self#keep_checkbounds n in
+      {i with next = self#cse n1 i.next}
+  | Iop op ->
+      begin match self#class_of_operation op with
+      | Op_pure | Op_checkbound | Op_load ->
+          assert (Array.length i.res <= 1);
+          let (n1, varg) = valnum_regs n i.arg in
+          begin match find_equation n1 (op, varg) with
+          | Some vres ->
+              (* This operation was computed earlier. *)
+              let n2 = set_known_regs n1 i.res vres in            
+              begin match find_regs_containing n1 vres with
+              | Some res when not (self#is_cheap_operation op) ->
+                  (* We can replace res <- op args with r <- move res.
+                     If the operation is very cheap to compute, e.g.
+                     an integer constant, don't bother. *)
+                  insert_move res i.res (self#cse n2 i.next)                
+              | _ ->
+                  {i with next = self#cse n2 i.next}
+              end
+          | None ->
+              (* This operation produces a result we haven't seen earlier. *)
+              let n2 = set_fresh_regs n1 i.res (op, varg) in
+              {i with next = self#cse n2 i.next}
+          end
+      | Op_store false | Op_other ->
+          (* An initializing store or an "other" operation do not invalidate
+             any equations, but we do not know anything about the results. *)
+          let n1 = set_unknown_regs n i.res in
+          {i with next = self#cse n1 i.next}
+      | Op_store true ->
+          (* A non-initializing store: it can invalidate
+             anything we know about prior loads. *)
+          let n1 = set_unknown_regs (self#kill_loads n) i.res in
+          {i with next = self#cse n1 i.next}
+      end        
+  (* For control structures, we set the numbering to empty at every
+     join point, but propagate the current numbering across fork points. *)
+  | Iifthenelse(test, ifso, ifnot) ->
+      {i with desc = Iifthenelse(test, self#cse n ifso, self#cse n ifnot);
+              next = self#cse empty_numbering i.next}
+  | Iswitch(index, cases) ->
+      {i with desc = Iswitch(index, Array.map (self#cse n) cases);
+              next = self#cse empty_numbering i.next}
+  | Iloop(body) ->
+      {i with desc = Iloop(self#cse empty_numbering body);
+              next = self#cse empty_numbering i.next}
+  | Icatch(nfail, body, handler) ->
+      {i with desc = Icatch(nfail, self#cse n body, self#cse empty_numbering handler);
+              next = self#cse empty_numbering i.next}
+  | Itrywith(body, handler) ->
+      {i with desc = Itrywith(self#cse n body, self#cse empty_numbering handler);
+              next = self#cse empty_numbering i.next}
+
+method fundecl f =
+  {f with fun_body = self#cse empty_numbering f.fun_body}
+
+end
+
+
+
diff --git a/asmcomp/CSEgen.mli b/asmcomp/CSEgen.mli
new file mode 100644
index 000000000..c19855eca
--- /dev/null
+++ b/asmcomp/CSEgen.mli
@@ -0,0 +1,38 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
+(*                                                                     *)
+(*  Copyright 2014 Institut National de Recherche en Informatique et   *)
+(*  en Automatique.  All rights reserved.  This file is distributed    *)
+(*  under the terms of the Q Public License version 1.0.               *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* Common subexpression elimination by value numbering over extended
+   basic blocks. *)
+
+type op_class =
+  | Op_pure     (* pure, produce one result *)
+  | Op_checkbound     (* checkbound-style: no result, can raise an exn *)
+  | Op_load           (* memory load *)
+  | Op_store of bool  (* memory store, false = init, true = assign *)
+  | Op_other          (* anything else that does not store in memory *)
+
+class cse_generic : object
+  (* The following methods can be overriden to handle processor-specific
+     operations. *)
+
+  method class_of_operation: Mach.operation -> op_class
+
+  method is_cheap_operation: Mach.operation -> bool
+    (* Operations that are so cheap that it isn't worth factoring them. *)
+
+  (* The following method is the entry point and should not be overridden *)
+  method fundecl: Mach.fundecl -> Mach.fundecl
+
+end
+
+
+
diff --git a/asmcomp/amd64/CSE.ml b/asmcomp/amd64/CSE.ml
new file mode 100644
index 000000000..63ef08853
--- /dev/null
+++ b/asmcomp/amd64/CSE.ml
@@ -0,0 +1,36 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
+(*                                                                     *)
+(*  Copyright 2014 Institut National de Recherche en Informatique et   *)
+(*  en Automatique.  All rights reserved.  This file is distributed    *)
+(*  under the terms of the Q Public License version 1.0.               *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* CSE for the AMD64 *)
+
+open Arch
+open Mach
+open CSEgen
+
+class cse = object (self)
+
+inherit cse_generic as super
+
+method! class_of_operation op =
+  match op with
+  | Ispecific(Ilea _) -> Op_pure
+  | Ispecific(Istore_int(_, _, is_asg)) -> Op_store is_asg
+  | Ispecific(Istore_symbol(_, _, is_asg)) -> Op_store is_asg
+  | Ispecific(Ioffset_loc(_, _)) -> Op_store true
+  | Ispecific(Ifloatarithmem _) -> Op_load
+  | _ -> super#class_of_operation op
+
+end
+
+let fundecl f =
+  (new cse)#fundecl f
+
diff --git a/asmcomp/amd64/arch.ml b/asmcomp/amd64/arch.ml
index b0a5ffb8b..3741dd74b 100644
--- a/asmcomp/amd64/arch.ml
+++ b/asmcomp/amd64/arch.ml
@@ -33,8 +33,8 @@ type addressing_mode =
 
 type specific_operation =
     Ilea of addressing_mode             (* "lea" gives scaled adds *)
-  | Istore_int of nativeint * addressing_mode (* Store an integer constant *)
-  | Istore_symbol of string * addressing_mode (* Store a symbol *)
+  | Istore_int of nativeint * addressing_mode * bool (* Store an integer constant *)
+  | Istore_symbol of string * addressing_mode * bool (* Store a symbol *)
   | Ioffset_loc of int * addressing_mode (* Add a constant to a location *)
   | Ifloatarithmem of float_operation * addressing_mode
                                        (* Float arith operation with memory *)
@@ -101,10 +101,14 @@ let print_addressing printreg addr ppf arg =
 let print_specific_operation printreg op ppf arg =
   match op with
   | Ilea addr -> print_addressing printreg addr ppf arg
-  | Istore_int(n, addr) ->
-      fprintf ppf "[%a] := %nd" (print_addressing printreg addr) arg n
-  | Istore_symbol(lbl, addr) ->
-      fprintf ppf "[%a] := \"%s\"" (print_addressing printreg addr) arg lbl
+  | Istore_int(n, addr, is_assign) ->
+      fprintf ppf "[%a] := %nd %s"
+         (print_addressing printreg addr) arg n
+         (if is_assign then "(assign)" else "(init)")
+  | Istore_symbol(lbl, addr, is_assign) ->
+      fprintf ppf "[%a] := \"%s\" %s"
+         (print_addressing printreg addr) arg lbl
+         (if is_assign then "(assign)" else "(init)")
   | Ioffset_loc(n, addr) ->
       fprintf ppf "[%a] +:= %i" (print_addressing printreg addr) arg n
   | Isqrtf ->
diff --git a/asmcomp/amd64/emit.mlp b/asmcomp/amd64/emit.mlp
index bea7e9331..b576ece98 100644
--- a/asmcomp/amd64/emit.mlp
+++ b/asmcomp/amd64/emit.mlp
@@ -449,7 +449,7 @@ let emit_instr fallthrough i =
           | Double | Double_u ->
             `	movsd	{emit_addressing addr i.arg 0}, {emit_reg dest}\n`
         end
-    | Lop(Istore(chunk, addr)) ->
+    | Lop(Istore(chunk, addr, _)) ->
         begin match chunk with
           | Word ->
             `	movq	{emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
@@ -542,9 +542,9 @@ let emit_instr fallthrough i =
         `	cvttsd2siq	{emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n`
     | Lop(Ispecific(Ilea addr)) ->
         `	leaq	{emit_addressing addr i.arg 0}, {emit_reg i.res.(0)}\n`
-    | Lop(Ispecific(Istore_int(n, addr))) ->
+    | Lop(Ispecific(Istore_int(n, addr, _))) ->
         `	movq	${emit_nativeint n}, {emit_addressing addr i.arg 0}\n`
-    | Lop(Ispecific(Istore_symbol(s, addr))) ->
+    | Lop(Ispecific(Istore_symbol(s, addr, _))) ->
         assert (not !pic_code && not !Clflags.dlcode);
         `	movq	${emit_symbol s}, {emit_addressing addr i.arg 0}\n`
     | Lop(Ispecific(Ioffset_loc(n, addr))) ->
diff --git a/asmcomp/amd64/emit_nt.mlp b/asmcomp/amd64/emit_nt.mlp
index cc158d889..a66f0c93b 100644
--- a/asmcomp/amd64/emit_nt.mlp
+++ b/asmcomp/amd64/emit_nt.mlp
@@ -443,7 +443,7 @@ let emit_instr fallthrough i =
           | Double | Double_u ->
             `	movsd	{emit_reg dest}, REAL8 PTR {emit_addressing addr i.arg 0}\n`
         end
-    | Lop(Istore(chunk, addr)) ->
+    | Lop(Istore(chunk, addr, _)) ->
         begin match chunk with
           | Word ->
             `	mov	QWORD PTR {emit_addressing addr i.arg 1}, {emit_reg i.arg.(0)}\n`
@@ -532,9 +532,9 @@ let emit_instr fallthrough i =
         `	cvttsd2si	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
     | Lop(Ispecific(Ilea addr)) ->
         `	lea	{emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`
-    | Lop(Ispecific(Istore_int(n, addr))) ->
+    | Lop(Ispecific(Istore_int(n, addr, _))) ->
         `	mov	QWORD PTR {emit_addressing addr i.arg 0}, {emit_nativeint n}\n`
-    | Lop(Ispecific(Istore_symbol(s, addr))) ->
+    | Lop(Ispecific(Istore_symbol(s, addr, _))) ->
         assert (not !pic_code);
 	add_used_symbol s;
         `	mov	QWORD PTR {emit_addressing addr i.arg 0}, OFFSET {emit_symbol s}\n`
diff --git a/asmcomp/amd64/proc.ml b/asmcomp/amd64/proc.ml
index 5d448df36..cd06559e1 100644
--- a/asmcomp/amd64/proc.ml
+++ b/asmcomp/amd64/proc.ml
@@ -259,7 +259,7 @@ let destroyed_at_oper = function
   | Iop(Iextcall(_, false)) -> destroyed_at_c_call
   | Iop(Iintop(Idiv | Imod)) | Iop(Iintop_imm((Idiv | Imod), _))
         -> [| rax; rdx |]
-  | Iop(Istore(Single, _)) -> [| rxmm15 |]
+  | Iop(Istore(Single, _, _)) -> [| rxmm15 |]
   | Iop(Ialloc _ | Iintop(Imulh | Icomp _) | Iintop_imm((Icomp _), _))
         -> [| rax |]
   | Iswitch(_, _) -> [| rax; rdx |]
@@ -290,7 +290,7 @@ let max_register_pressure = function
     if fp then [| 10; 16 |] else [| 11; 16 |]
   | Ialloc _ | Iintop(Icomp _) | Iintop_imm((Icomp _), _) ->
     if fp then [| 11; 16 |] else [| 12; 16 |]
-  | Istore(Single, _) ->
+  | Istore(Single, _, _) ->
     if fp then [| 12; 15 |] else [| 13; 15 |]
   | _ -> if fp then [| 12; 16 |] else [| 13; 16 |]
 
diff --git a/asmcomp/amd64/selection.ml b/asmcomp/amd64/selection.ml
index 5e6afbcab..fa7fe66c0 100644
--- a/asmcomp/amd64/selection.ml
+++ b/asmcomp/amd64/selection.ml
@@ -152,20 +152,20 @@ method select_addressing chunk exp =
     | Ascaledadd(e1, e2, scale) ->
         (Iindexed2scaled(scale, d), Ctuple[e1; e2])
 
-method! select_store addr exp =
+method! select_store is_assign addr exp =
   match exp with
     Cconst_int n when self#is_immediate n ->
-      (Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple [])
+      (Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple [])
   | (Cconst_natint n | Cconst_blockheader n) when self#is_immediate_natint n ->
-      (Ispecific(Istore_int(n, addr)), Ctuple [])
+      (Ispecific(Istore_int(n, addr, is_assign)), Ctuple [])
   | Cconst_pointer n when self#is_immediate n ->
-      (Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple [])
+      (Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple [])
   | Cconst_natpointer n when self#is_immediate_natint n ->
-      (Ispecific(Istore_int(n, addr)), Ctuple [])
+      (Ispecific(Istore_int(n, addr, is_assign)), Ctuple [])
   | Cconst_symbol s when not (!pic_code || !Clflags.dlcode) ->
-      (Ispecific(Istore_symbol(s, addr)), Ctuple [])
+      (Ispecific(Istore_symbol(s, addr, is_assign)), Ctuple [])
   | _ ->
-      super#select_store addr exp
+      super#select_store is_assign addr exp
 
 method! select_operation op args =
   match op with
diff --git a/asmcomp/arm/CSE.ml b/asmcomp/arm/CSE.ml
new file mode 100644
index 000000000..00282f1f5
--- /dev/null
+++ b/asmcomp/arm/CSE.ml
@@ -0,0 +1,38 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
+(*                                                                     *)
+(*  Copyright 2014 Institut National de Recherche en Informatique et   *)
+(*  en Automatique.  All rights reserved.  This file is distributed    *)
+(*  under the terms of the Q Public License version 1.0.               *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* CSE for ARM *)
+
+open Arch
+open Mach
+open CSEgen
+
+class cse = object (self)
+
+inherit cse_generic as super
+
+method! class_of_operation op =
+  match op with
+  | Ispecific(Ishiftcheckbound _) -> Op_checkbound
+  | Ispecific _ -> Op_pure
+  | _ -> super#class_of_operation op
+
+method! is_cheap_operation op =
+  match op with
+  | Iconst_int n | Iconst_blockheader n -> n <= 255n && n >= 0n
+  | _ -> false
+
+end
+
+let fundecl f =
+  (new cse)#fundecl f
+
diff --git a/asmcomp/arm/emit.mlp b/asmcomp/arm/emit.mlp
index 18c873de2..61035b85f 100644
--- a/asmcomp/arm/emit.mlp
+++ b/asmcomp/arm/emit.mlp
@@ -283,7 +283,7 @@ let num_literals = ref 0
 
 (* Label a floating-point literal *)
 let float_literal f =
-  let repr = Int64.bits_of_float cst in
+  let repr = Int64.bits_of_float f in
   try
     List.assoc repr !float_literals
   with Not_found ->
@@ -391,8 +391,7 @@ let emit_instr i =
     | Lop(Iconst_int n | Iconst_blockheader n) ->
         emit_intconst i.res.(0) (Nativeint.to_int32 n)
     | Lop(Iconst_float f) when !fpu = Soft ->
-        `	@ {emit_string f}\n`;
-        let bits = Int64.bits_of_float (float_of_string f) in
+        let bits = Int64.bits_of_float f in
         let high_bits = Int64.to_int32 (Int64.shift_right_logical bits 32)
         and low_bits = Int64.to_int32 bits in
         if is_immediate low_bits || is_immediate high_bits then begin
@@ -407,7 +406,7 @@ let emit_instr i =
         end
     | Lop(Iconst_float f) when !fpu = VFPv2 ->
         let lbl = float_literal f in
-        `	fldd	{emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string f}\n`;
+        `	fldd	{emit_reg i.res.(0)}, {emit_label lbl}\n`;
         1
     | Lop(Iconst_float f) ->
         let encode imm =
@@ -426,12 +425,12 @@ let emit_instr i =
               let ex = ((ex + 3) land 0x07) lxor 0x04 in
               Some((sg lsl 7) lor (ex lsl 4) lor mn)
           end in
-        begin match encode (Int64.bits_of_float (float_of_string f)) with
+        begin match encode (Int64.bits_of_float f) with
           None ->
             let lbl = float_literal f in
-            `	fldd	{emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string f}\n`
+            `	fldd	{emit_reg i.res.(0)}, {emit_label lbl}\n`
         | Some imm8 ->
-            `	fconstd	{emit_reg i.res.(0)}, #{emit_int imm8} @ {emit_string f}\n`
+            `	fconstd	{emit_reg i.res.(0)}, #{emit_int imm8}\n`
         end; 1
     | Lop(Iconst_symbol s) ->
         emit_load_symbol_addr i.res.(0) s
@@ -509,10 +508,10 @@ let emit_instr i =
           | Double_u -> "fldd"
           | _ (* 32-bit quantities *) -> "ldr" in
         `	{emit_string instr}	{emit_reg r}, {emit_addressing addr i.arg 0}\n`; 1
-    | Lop(Istore(Single, addr)) when !fpu >= VFPv2 ->
+    | Lop(Istore(Single, addr, _)) when !fpu >= VFPv2 ->
         `	fcvtsd	s14, {emit_reg i.arg.(0)}\n`;
         `	fsts	s14, {emit_addressing addr i.arg 1}\n`; 2
-    | Lop(Istore((Double | Double_u), addr)) when !fpu = Soft ->
+    | Lop(Istore((Double | Double_u), addr, _)) when !fpu = Soft ->
         (* Use STM or STRD if possible *)
         begin match i.arg.(0), i.arg.(1), addr with
           {loc = Reg rt}, {loc = Reg rt2}, Iindexed 0
@@ -526,7 +525,7 @@ let emit_instr i =
             `	str	{emit_reg i.arg.(0)}, {emit_addressing addr i.arg 2}\n`;
             `	str	{emit_reg i.arg.(1)}, {emit_addressing addr' i.arg 2}\n`; 2
         end
-    | Lop(Istore(size, addr)) ->
+    | Lop(Istore(size, addr, _)) ->
         let r = i.arg.(0) in
         let instr =
           match size with
diff --git a/asmcomp/arm/proc.ml b/asmcomp/arm/proc.ml
index c49e3f087..a5bf3d5c8 100644
--- a/asmcomp/arm/proc.ml
+++ b/asmcomp/arm/proc.ml
@@ -203,7 +203,7 @@ let destroyed_at_oper = function
       [| phys_reg 3; phys_reg 8 |]  (* r3 and r12 destroyed *)
   | Iop(Iintop Imulh) when !arch < ARMv6 ->
       [| phys_reg 8 |]              (* r12 destroyed *)
-  | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _)) ->
+  | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _, _)) ->
       [| phys_reg 107 |]            (* d7 (s14-s15) destroyed *)
   | _ -> [||]
 
@@ -222,7 +222,7 @@ let max_register_pressure = function
   | Ialloc _ -> if abi = EABI then [| 7; 0; 0 |] else [| 7; 8; 8 |]
   | Iconst_symbol _ when !pic_code -> [| 7; 16; 32 |]
   | Iintoffloat | Ifloatofint
-  | Iload(Single, _) | Istore(Single, _) -> [| 9; 15; 31 |]
+  | Iload(Single, _) | Istore(Single, _, _) -> [| 9; 15; 31 |]
   | _ -> [| 9; 16; 32 |]
 
 (* Pure operations (without any side effect besides updating their result
@@ -231,7 +231,8 @@ let max_register_pressure = function
 let op_is_pure = function
   | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
   | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _
-  | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) -> false
+  | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _)
+  | Ispecific(Ishiftcheckbound _) -> false
   | _ -> true
 
 (* Layout of the stack *)
diff --git a/asmcomp/arm64/CSE.ml b/asmcomp/arm64/CSE.ml
new file mode 100644
index 000000000..359e57eb5
--- /dev/null
+++ b/asmcomp/arm64/CSE.ml
@@ -0,0 +1,38 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
+(*                                                                     *)
+(*  Copyright 2014 Institut National de Recherche en Informatique et   *)
+(*  en Automatique.  All rights reserved.  This file is distributed    *)
+(*  under the terms of the Q Public License version 1.0.               *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* CSE for ARM64 *)
+
+open Arch
+open Mach
+open CSEgen
+
+class cse = object (self)
+
+inherit cse_generic as super
+
+method! class_of_operation op =
+  match op with
+  | Ispecific(Ishiftcheckbound _) -> Op_checkbound
+  | Ispecific _ -> Op_pure
+  | _ -> super#class_of_operation op
+
+method! is_cheap_operation op =
+  match op with
+  | Iconst_int n | Iconst_blockheader n -> n <= 65535n && n >= 0n
+  | _ -> false
+
+end
+
+let fundecl f =
+  (new cse)#fundecl f
+
diff --git a/asmcomp/arm64/emit.mlp b/asmcomp/arm64/emit.mlp
index eaab3ef48..2c2454fde 100644
--- a/asmcomp/arm64/emit.mlp
+++ b/asmcomp/arm64/emit.mlp
@@ -328,9 +328,9 @@ let emit_instr i =
     | Lop(Iconst_float f) ->
         let b = Int64.bits_of_float f in
         if b = 0L then
-          `	fmov	{emit_reg i.res.(0)}, xzr	/* {emit_string f} */\n`
+          `	fmov	{emit_reg i.res.(0)}, xzr\n`
         else if is_immediate_float b then
-          `	fmov	{emit_reg i.res.(0)}, #{emit_printf "0x%Lx" b}	/* {emit_string f} */\n`
+          `	fmov	{emit_reg i.res.(0)}, #{emit_printf "0x%Lx" b}\n`
         else begin
           let lbl = float_literal b in
           `	adrp	{emit_reg reg_tmp1}, {emit_label lbl}\n`;
@@ -388,7 +388,7 @@ let emit_instr i =
         | Word | Double | Double_u ->
             `	ldr	{emit_reg dst}, {emit_addressing addr base}\n`
         end
-    | Lop(Istore(size, addr)) ->
+    | Lop(Istore(size, addr, _)) ->
         let src = i.arg.(0) in
         let base =
           match addr with
diff --git a/asmcomp/arm64/proc.ml b/asmcomp/arm64/proc.ml
index b52c2fd8a..d2cda5c23 100644
--- a/asmcomp/arm64/proc.ml
+++ b/asmcomp/arm64/proc.ml
@@ -177,7 +177,7 @@ let destroyed_at_oper = function
       destroyed_at_c_call
   | Iop(Ialloc _) ->
       [| reg_x15 |]
-  | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _)) ->
+  | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _, _)) ->
       [| reg_d7 |]            (* d7 / s7 destroyed *)
   | _ -> [||]
 
@@ -194,9 +194,19 @@ let max_register_pressure = function
   | Iextcall(_, _) -> [| 10; 8 |]
   | Ialloc _ -> [| 25; 32 |]
   | Iintoffloat | Ifloatofint
-  | Iload(Single, _) | Istore(Single, _) -> [| 26; 31 |]
+  | Iload(Single, _) | Istore(Single, _, _) -> [| 26; 31 |]
   | _ -> [| 26; 32 |]
 
+(* Pure operations (without any side effect besides updating their result
+   registers). *)
+
+let op_is_pure = function
+  | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
+  | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _
+  | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _)
+  | Ispecific(Ishiftcheckbound _) -> false
+  | _ -> true
+
 (* Layout of the stack *)
 
 let num_stack_slots = [| 0; 0 |]
diff --git a/asmcomp/asmgen.ml b/asmcomp/asmgen.ml
index ab71d51a0..311bb029b 100644
--- a/asmcomp/asmgen.ml
+++ b/asmcomp/asmgen.ml
@@ -64,6 +64,8 @@ let compile_fundecl (ppf : formatter) fd_cmm =
   ++ pass_dump_if ppf dump_selection "After instruction selection"
   ++ Comballoc.fundecl
   ++ pass_dump_if ppf dump_combine "After allocation combining"
+  ++ CSE.fundecl
+  ++ pass_dump_if ppf dump_cse "After CSE"
   ++ liveness ppf
   ++ Deadcode.fundecl
   ++ pass_dump_if ppf dump_live "Liveness analysis"
diff --git a/asmcomp/i386/CSE.ml b/asmcomp/i386/CSE.ml
new file mode 100644
index 000000000..3ce456702
--- /dev/null
+++ b/asmcomp/i386/CSE.ml
@@ -0,0 +1,48 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
+(*                                                                     *)
+(*  Copyright 2014 Institut National de Recherche en Informatique et   *)
+(*  en Automatique.  All rights reserved.  This file is distributed    *)
+(*  under the terms of the Q Public License version 1.0.               *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* CSE for the i386 *)
+
+open Cmm
+open Arch
+open Mach
+open CSEgen
+
+class cse = object (self)
+
+inherit cse_generic as super
+
+method! class_of_operation op =
+  match op with
+  (* Operations that affect the floating-point stack cannot be factored *)
+  | Iconst_float _ | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf
+  | Iintoffloat | Ifloatofint
+  | Iload((Single | Double | Double_u), _) -> Op_other
+  (* Specific ops *)
+  | Ispecific(Ilea _) -> Op_pure
+  | Ispecific(Istore_int(_, _, is_asg)) -> Op_store is_asg
+  | Ispecific(Istore_symbol(_, _, is_asg)) -> Op_store is_asg
+  | Ispecific(Ioffset_loc(_, _)) -> Op_store true
+  | Ispecific _ -> Op_other
+  | _ -> super#class_of_operation op
+
+method! is_cheap_operation op =
+  match op with
+  | Iconst_int _ | Iconst_blockheader _ -> true
+  | Iconst_symbol _ -> true
+  | _ -> false
+
+end
+
+let fundecl f =
+  (new cse)#fundecl f
+
diff --git a/asmcomp/i386/arch.ml b/asmcomp/i386/arch.ml
index d2f9fd61a..0d2130445 100644
--- a/asmcomp/i386/arch.ml
+++ b/asmcomp/i386/arch.ml
@@ -31,8 +31,8 @@ type addressing_mode =
 
 type specific_operation =
     Ilea of addressing_mode             (* Lea gives scaled adds *)
-  | Istore_int of nativeint * addressing_mode (* Store an integer constant *)
-  | Istore_symbol of string * addressing_mode (* Store a symbol *)
+  | Istore_int of nativeint * addressing_mode * bool (* Store an integer constant *)
+  | Istore_symbol of string * addressing_mode * bool (* Store a symbol *)
   | Ioffset_loc of int * addressing_mode (* Add a constant to a location *)
   | Ipush                               (* Push regs on stack *)
   | Ipush_int of nativeint            (* Push an integer constant *)
@@ -105,11 +105,14 @@ let print_addressing printreg addr ppf arg =
 let print_specific_operation printreg op ppf arg =
   match op with
   | Ilea addr -> print_addressing printreg addr ppf arg
-  | Istore_int(n, addr) ->
-      fprintf ppf "[%a] := %s" (print_addressing printreg addr) arg
-                               (Nativeint.to_string n)
-  | Istore_symbol(lbl, addr) ->
-      fprintf ppf "[%a] := \"%s\"" (print_addressing printreg addr) arg lbl
+  | Istore_int(n, addr, is_assign) ->
+      fprintf ppf "[%a] := %nd %s"
+         (print_addressing printreg addr) arg n
+         (if is_assign then "(assign)" else "(init)")
+  | Istore_symbol(lbl, addr, is_assign) ->
+      fprintf ppf "[%a] := \"%s\" %s"
+         (print_addressing printreg addr) arg lbl
+         (if is_assign then "(assign)" else "(init)")
   | Ioffset_loc(n, addr) ->
       fprintf ppf "[%a] +:= %i" (print_addressing printreg addr) arg n
   | Ipush ->
diff --git a/asmcomp/i386/emit.mlp b/asmcomp/i386/emit.mlp
index 532c43dfe..98df5f958 100644
--- a/asmcomp/i386/emit.mlp
+++ b/asmcomp/i386/emit.mlp
@@ -544,7 +544,7 @@ let emit_instr fallthrough i =
           | Double | Double_u ->
             `	fldl	{emit_addressing addr i.arg 0}\n`
         end
-    | Lop(Istore(chunk, addr)) ->
+    | Lop(Istore(chunk, addr, _)) ->
         begin match chunk with
           | Word | Thirtytwo_signed | Thirtytwo_unsigned ->
             `	movl	{emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
@@ -684,9 +684,9 @@ let emit_instr fallthrough i =
         stack_offset := !stack_offset + 8
     | Lop(Ispecific(Ilea addr)) ->
         `	lea	{emit_addressing addr i.arg 0}, {emit_reg i.res.(0)}\n`
-    | Lop(Ispecific(Istore_int(n, addr))) ->
+    | Lop(Ispecific(Istore_int(n, addr, _))) ->
         `	movl	${emit_nativeint n}, {emit_addressing addr i.arg 0}\n`
-    | Lop(Ispecific(Istore_symbol(s, addr))) ->
+    | Lop(Ispecific(Istore_symbol(s, addr, _))) ->
         `	movl	${emit_symbol s}, {emit_addressing addr i.arg 0}\n`
     | Lop(Ispecific(Ioffset_loc(n, addr))) ->
         `	addl	${emit_int n}, {emit_addressing addr i.arg 0}\n`
diff --git a/asmcomp/i386/emit_nt.mlp b/asmcomp/i386/emit_nt.mlp
index 5d030ec07..a9c9db3e4 100644
--- a/asmcomp/i386/emit_nt.mlp
+++ b/asmcomp/i386/emit_nt.mlp
@@ -480,7 +480,7 @@ let emit_instr i =
           | Double | Double_u ->
             `	fld	REAL8 PTR {emit_addressing addr i.arg 0}\n`
         end
-    | Lop(Istore(chunk, addr)) ->
+    | Lop(Istore(chunk, addr, _)) ->
         begin match chunk with
           | Word | Thirtytwo_signed | Thirtytwo_unsigned ->
             `	mov	DWORD PTR {emit_addressing addr i.arg 1}, {emit_reg i.arg.(0)}\n`
@@ -618,9 +618,9 @@ let emit_instr i =
         stack_offset := !stack_offset + 8
     | Lop(Ispecific(Ilea addr)) ->
         `	lea	{emit_reg i.res.(0)}, DWORD PTR {emit_addressing addr i.arg 0}\n`
-    | Lop(Ispecific(Istore_int(n, addr))) ->
+    | Lop(Ispecific(Istore_int(n, addr, _))) ->
         `	mov	DWORD PTR {emit_addressing addr i.arg 0},{emit_nativeint n}\n`
-    | Lop(Ispecific(Istore_symbol(s, addr))) ->
+    | Lop(Ispecific(Istore_symbol(s, addr, _))) ->
         add_used_symbol s ;
         `	mov	DWORD PTR {emit_addressing addr i.arg 0},OFFSET {emit_symbol s}\n`
     | Lop(Ispecific(Ioffset_loc(n, addr))) ->
diff --git a/asmcomp/i386/selection.ml b/asmcomp/i386/selection.ml
index d86f1b282..10d2d40e3 100644
--- a/asmcomp/i386/selection.ml
+++ b/asmcomp/i386/selection.ml
@@ -135,7 +135,7 @@ let pseudoregs_for_operation op arg res =
   (* For storing a byte, the argument must be in eax...edx.
      (But for a short, any reg will do!)
      Keep it simple, just force the argument to be in edx. *)
-  | Istore((Byte_unsigned | Byte_signed), addr) ->
+  | Istore((Byte_unsigned | Byte_signed), addr, _) ->
       let newarg = Array.copy arg in
       newarg.(0) <- edx;
       (newarg, res, false)
@@ -178,20 +178,20 @@ method select_addressing chunk exp =
   | (Ascaledadd(e1, e2, scale), d) ->
       (Iindexed2scaled(scale, d), Ctuple[e1; e2])
 
-method! select_store addr exp =
+method! select_store is_assign addr exp =
   match exp with
     Cconst_int n ->
-      (Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple [])
+      (Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple [])
   | (Cconst_natint n | Cconst_blockheader n) ->
-      (Ispecific(Istore_int(n, addr)), Ctuple [])
+      (Ispecific(Istore_int(n, addr, is_assign)), Ctuple [])
   | Cconst_pointer n ->
-      (Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple [])
+      (Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple [])
   | Cconst_natpointer n ->
-      (Ispecific(Istore_int(n, addr)), Ctuple [])
+      (Ispecific(Istore_int(n, addr, is_assign)), Ctuple [])
   | Cconst_symbol s ->
-      (Ispecific(Istore_symbol(s, addr)), Ctuple [])
+      (Ispecific(Istore_symbol(s, addr, is_assign)), Ctuple [])
   | _ ->
-      super#select_store addr exp
+      super#select_store is_assign addr exp
 
 method! select_operation op args =
   match op with
diff --git a/asmcomp/mach.ml b/asmcomp/mach.ml
index 6b141230f..3a7174763 100644
--- a/asmcomp/mach.ml
+++ b/asmcomp/mach.ml
@@ -46,7 +46,7 @@ type operation =
   | Iextcall of string * bool
   | Istackoffset of int
   | Iload of Cmm.memory_chunk * Arch.addressing_mode
-  | Istore of Cmm.memory_chunk * Arch.addressing_mode
+  | Istore of Cmm.memory_chunk * Arch.addressing_mode * bool
   | Ialloc of int
   | Iintop of integer_operation
   | Iintop_imm of integer_operation * int
diff --git a/asmcomp/mach.mli b/asmcomp/mach.mli
index 30643730d..618e5e4ce 100644
--- a/asmcomp/mach.mli
+++ b/asmcomp/mach.mli
@@ -43,10 +43,11 @@ type operation =
   | Icall_imm of string
   | Itailcall_ind
   | Itailcall_imm of string
-  | Iextcall of string * bool
+  | Iextcall of string * bool    (* false = noalloc, true = alloc *)
   | Istackoffset of int
   | Iload of Cmm.memory_chunk * Arch.addressing_mode
-  | Istore of Cmm.memory_chunk * Arch.addressing_mode
+  | Istore of Cmm.memory_chunk * Arch.addressing_mode * bool
+                                 (* false = initialization, true = assignment *)
   | Ialloc of int
   | Iintop of integer_operation
   | Iintop_imm of integer_operation * int
diff --git a/asmcomp/power/CSE.ml b/asmcomp/power/CSE.ml
new file mode 100644
index 000000000..50fefa5e3
--- /dev/null
+++ b/asmcomp/power/CSE.ml
@@ -0,0 +1,38 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
+(*                                                                     *)
+(*  Copyright 2014 Institut National de Recherche en Informatique et   *)
+(*  en Automatique.  All rights reserved.  This file is distributed    *)
+(*  under the terms of the Q Public License version 1.0.               *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* CSE for the PowerPC *)
+
+open Arch
+open Mach
+open CSEgen
+
+class cse = object (self)
+
+inherit cse_generic as super
+
+method! class_of_operation op =
+  match op with
+  | Ispecific(Imultaddf | Imultsubf) -> Op_pure
+  | Ispecific(Ialloc_far _) -> Op_other
+  | _ -> super#class_of_operation op
+
+method! is_cheap_operation op =
+  match op with
+  | Iconst_int n | Iconst_blockheader n -> n <= 32767n && n >= -32768n
+  | _ -> false
+
+end
+
+let fundecl f =
+  (new cse)#fundecl f
+
diff --git a/asmcomp/power/emit.mlp b/asmcomp/power/emit.mlp
index f31d632c1..0a26ed147 100644
--- a/asmcomp/power/emit.mlp
+++ b/asmcomp/power/emit.mlp
@@ -333,7 +333,7 @@ let instr_size = function
       if chunk = Byte_signed
       then load_store_size addr + 1
       else load_store_size addr
-  | Lop(Istore(chunk, addr)) -> load_store_size addr
+  | Lop(Istore(chunk, addr, _)) -> load_store_size addr
   | Lop(Ialloc n) -> 4
   | Lop(Ispecific(Ialloc_far n)) -> 5
   | Lop(Iintop Imod) -> 3
@@ -548,7 +548,7 @@ let rec emit_instr i dslot =
         emit_load_store loadinstr addr i.arg 0 i.res.(0);
         if chunk = Byte_signed then
           `	extsb	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
-    | Lop(Istore(chunk, addr)) ->
+    | Lop(Istore(chunk, addr, _)) ->
         let storeinstr =
           match chunk with
             Byte_unsigned | Byte_signed -> "stb"
diff --git a/asmcomp/power/scheduling.ml b/asmcomp/power/scheduling.ml
index 6e594f028..7adaa2eed 100644
--- a/asmcomp/power/scheduling.ml
+++ b/asmcomp/power/scheduling.ml
@@ -44,7 +44,7 @@ method reload_retaddr_latency = 12
 method oper_issue_cycles = function
     Iconst_float _ | Iconst_symbol _ -> 2
   | Iload(_, Ibased(_, _)) -> 2
-  | Istore(_, Ibased(_, _)) -> 2
+  | Istore(_, Ibased(_, _), _) -> 2
   | Ialloc _ -> 4
   | Iintop(Imod) -> 40 (* assuming full stall *)
   | Iintop(Icomp _) -> 4
diff --git a/asmcomp/printmach.ml b/asmcomp/printmach.ml
index 0c577890d..a39160d28 100644
--- a/asmcomp/printmach.ml
+++ b/asmcomp/printmach.ml
@@ -119,12 +119,13 @@ let operation op arg ppf res =
   | Iload(chunk, addr) ->
       fprintf ppf "%s[%a]"
        (Printcmm.chunk chunk) (Arch.print_addressing reg addr) arg
-  | Istore(chunk, addr) ->
-      fprintf ppf "%s[%a] := %a"
+  | Istore(chunk, addr, is_assign) ->
+      fprintf ppf "%s[%a] := %a %s"
        (Printcmm.chunk chunk)
        (Arch.print_addressing reg addr)
        (Array.sub arg 1 (Array.length arg - 1))
        reg arg.(0)
+       (if is_assign then "(assign)" else "(init)")
   | Ialloc n -> fprintf ppf "alloc %i" n
   | Iintop(op) -> fprintf ppf "%a%s%a" reg arg.(0) (intop op) reg arg.(1)
   | Iintop_imm(op, n) -> fprintf ppf "%a%s%i" reg arg.(0) (intop op) n
diff --git a/asmcomp/schedgen.ml b/asmcomp/schedgen.ml
index e04eacd37..eb91854a5 100644
--- a/asmcomp/schedgen.ml
+++ b/asmcomp/schedgen.ml
@@ -165,7 +165,7 @@ method private instr_in_basic_block instr =
    load or store instructions (e.g. on the I386). *)
 
 method is_store = function
-    Istore(_, _) -> true
+    Istore(_, _, _) -> true
   | _ -> false
 
 method is_load = function
diff --git a/asmcomp/selectgen.ml b/asmcomp/selectgen.ml
index a8f073e53..e30d6fec3 100644
--- a/asmcomp/selectgen.ml
+++ b/asmcomp/selectgen.ml
@@ -209,8 +209,8 @@ method virtual select_addressing :
 
 (* Default instruction selection for stores (of words) *)
 
-method select_store addr arg =
-  (Istore(Word, addr), arg)
+method select_store is_assign addr arg =
+  (Istore(Word, addr, is_assign), arg)
 
 (* call marking methods, documented in selectgen.mli *)
 
@@ -256,10 +256,10 @@ method select_operation op args =
   | (Cstore chunk, [arg1; arg2]) ->
       let (addr, eloc) = self#select_addressing chunk arg1 in
       if chunk = Word then begin
-        let (op, newarg2) = self#select_store addr arg2 in
+        let (op, newarg2) = self#select_store true addr arg2 in
         (op, [newarg2; eloc])
       end else begin
-        (Istore(chunk, addr), [arg2; eloc])
+        (Istore(chunk, addr, true), [arg2; eloc])
         (* Inversion addr/datum in Istore *)
       end
   | (Calloc, _) -> (Ialloc 0, args)
@@ -677,16 +677,16 @@ method emit_stores env data regs_addr =
     ref (Arch.offset_addressing Arch.identity_addressing (-Arch.size_int)) in
   List.iter
     (fun e ->
-      let (op, arg) = self#select_store !a e in
+      let (op, arg) = self#select_store false !a e in
       match self#emit_expr env arg with
         None -> assert false
       | Some regs ->
           match op with
-            Istore(_, _) ->
+            Istore(_, _, _) ->
               for i = 0 to Array.length regs - 1 do
                 let r = regs.(i) in
                 let kind = if r.typ = Float then Double_u else Word in
-                self#insert (Iop(Istore(kind, !a)))
+                self#insert (Iop(Istore(kind, !a, false)))
                             (Array.append [|r|] regs_addr) [||];
                 a := Arch.offset_addressing !a (size_component r.typ)
               done
diff --git a/asmcomp/selectgen.mli b/asmcomp/selectgen.mli
index 7012c900c..abc6db5eb 100644
--- a/asmcomp/selectgen.mli
+++ b/asmcomp/selectgen.mli
@@ -35,7 +35,8 @@ class virtual selector_generic : object
   method select_condition : Cmm.expression -> Mach.test * Cmm.expression
     (* Can be overridden to deal with special test instructions *)
   method select_store :
-    Arch.addressing_mode -> Cmm.expression -> Mach.operation * Cmm.expression
+    bool -> Arch.addressing_mode -> Cmm.expression ->
+                                         Mach.operation * Cmm.expression
     (* Can be overridden to deal with special store constant instructions *)
   method regs_for : Cmm.machtype -> Reg.t array
     (* Return an array of fresh registers of the given type.
diff --git a/asmcomp/sparc/CSE.ml b/asmcomp/sparc/CSE.ml
new file mode 100644
index 000000000..c38bab8fe
--- /dev/null
+++ b/asmcomp/sparc/CSE.ml
@@ -0,0 +1,31 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
+(*                                                                     *)
+(*  Copyright 2014 Institut National de Recherche en Informatique et   *)
+(*  en Automatique.  All rights reserved.  This file is distributed    *)
+(*  under the terms of the Q Public License version 1.0.               *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* CSE for Sparc *)
+
+open Mach
+open CSEgen
+
+class cse = object (self)
+
+inherit cse_generic (* as super *)
+
+method! is_cheap_operation op =
+  match op with
+  | Iconst_int n | Iconst_blockheader n -> n <= 4095n && n >= -4096n
+  | _ -> false
+
+end
+
+let fundecl f =
+  (new cse)#fundecl f
+
diff --git a/asmcomp/sparc/emit.mlp b/asmcomp/sparc/emit.mlp
index 2793776ff..877a3d52a 100644
--- a/asmcomp/sparc/emit.mlp
+++ b/asmcomp/sparc/emit.mlp
@@ -375,7 +375,7 @@ let rec emit_instr i dslot =
               | _ -> "ld" in
             emit_load loadinstr addr i.arg dest
         end
-    | Lop(Istore(chunk, addr)) ->
+    | Lop(Istore(chunk, addr, _)) ->
         let src = i.arg.(0) in
         begin match chunk with
           Double_u ->
@@ -612,7 +612,7 @@ let is_one_instr i =
       | Iconst_int n | Iconst_blockheader n -> is_native_immediate n
       | Istackoffset _ -> true
       | Iload(_, Iindexed n) -> i.res.(0).typ <> Float && is_immediate n
-      | Istore(_, Iindexed n) -> i.arg.(0).typ <> Float && is_immediate n
+      | Istore(_, Iindexed n, _) -> i.arg.(0).typ <> Float && is_immediate n
       | Iintop(op) -> is_one_instr_op op
       | Iintop_imm(op, _) -> is_one_instr_op op
       | Iaddf | Isubf | Imulf | Idivf -> true
diff --git a/driver/main_args.ml b/driver/main_args.ml
index 7d1c40256..f72a08fff 100644
--- a/driver/main_args.ml
+++ b/driver/main_args.ml
@@ -389,6 +389,10 @@ let mk_dcombine f =
   "-dcombine", Arg.Unit f, " (undocumented)"
 ;;
 
+let mk_dcse f =
+  "-dcse", Arg.Unit f, " (undocumented)"
+;;
+
 let mk_dlive f =
   "-dlive", Arg.Unit f, " (undocumented)"
 ;;
@@ -599,6 +603,7 @@ module type Optcomp_options = sig
   val _dcmm : unit -> unit
   val _dsel : unit -> unit
   val _dcombine : unit -> unit
+  val _dcse : unit -> unit
   val _dlive : unit -> unit
   val _dspill : unit -> unit
   val _dsplit : unit -> unit
@@ -651,6 +656,7 @@ module type Opttop_options = sig
   val _dcmm : unit -> unit
   val _dsel : unit -> unit
   val _dcombine : unit -> unit
+  val _dcse : unit -> unit
   val _dlive : unit -> unit
   val _dspill : unit -> unit
   val _dsplit : unit -> unit
@@ -848,6 +854,7 @@ struct
     mk_dcmm F._dcmm;
     mk_dsel F._dsel;
     mk_dcombine F._dcombine;
+    mk_dcse F._dcse;
     mk_dlive F._dlive;
     mk_dspill F._dspill;
     mk_dsplit F._dsplit;
@@ -900,6 +907,7 @@ module Make_opttop_options (F : Opttop_options) = struct
     mk_dcmm F._dcmm;
     mk_dsel F._dsel;
     mk_dcombine F._dcombine;
+    mk_dcse F._dcse;
     mk_dlive F._dlive;
     mk_dspill F._dspill;
     mk_dsplit F._dsplit;
diff --git a/driver/main_args.mli b/driver/main_args.mli
index 6ebf95c9a..7d957d009 100644
--- a/driver/main_args.mli
+++ b/driver/main_args.mli
@@ -177,6 +177,7 @@ module type Optcomp_options = sig
   val _dcmm : unit -> unit
   val _dsel : unit -> unit
   val _dcombine : unit -> unit
+  val _dcse : unit -> unit
   val _dlive : unit -> unit
   val _dspill : unit -> unit
   val _dsplit : unit -> unit
@@ -229,6 +230,7 @@ module type Opttop_options = sig
   val _dcmm : unit -> unit
   val _dsel : unit -> unit
   val _dcombine : unit -> unit
+  val _dcse : unit -> unit
   val _dlive : unit -> unit
   val _dspill : unit -> unit
   val _dsplit : unit -> unit
diff --git a/driver/optmain.ml b/driver/optmain.ml
index 2fb1a22b3..c8060b6a9 100644
--- a/driver/optmain.ml
+++ b/driver/optmain.ml
@@ -135,6 +135,7 @@ module Options = Main_args.Make_optcomp_options (struct
   let _dcmm = set dump_cmm
   let _dsel = set dump_selection
   let _dcombine = set dump_combine
+  let _dcse = set dump_cse
   let _dlive () = dump_live := true; Printmach.print_live := true
   let _dspill = set dump_spill
   let _dsplit = set dump_split
diff --git a/tools/ocamloptp.ml b/tools/ocamloptp.ml
index 59cb661d9..69c288767 100644
--- a/tools/ocamloptp.ml
+++ b/tools/ocamloptp.ml
@@ -106,6 +106,7 @@ module Options = Main_args.Make_optcomp_options (struct
   let _dcmm = option "-dcmm"
   let _dsel = option "-dsel"
   let _dcombine = option "-dcombine"
+  let _dcse = option "-dcse"
   let _dlive = option "-dlive"
   let _dspill = option "-dspill"
   let _dsplit = option "-dsplit"
diff --git a/utils/clflags.ml b/utils/clflags.ml
index d0484b772..90b8a948f 100644
--- a/utils/clflags.ml
+++ b/utils/clflags.ml
@@ -73,6 +73,7 @@ let optimize_for_speed = ref true       (* -compact *)
 
 and dump_cmm = ref false                (* -dcmm *)
 let dump_selection = ref false          (* -dsel *)
+let dump_cse = ref false                (* -dcse *)
 let dump_live = ref false               (* -dlive *)
 let dump_spill = ref false              (* -dspill *)
 let dump_split = ref false              (* -dsplit *)
diff --git a/utils/clflags.mli b/utils/clflags.mli
index 02c378d23..489ac4d7b 100644
--- a/utils/clflags.mli
+++ b/utils/clflags.mli
@@ -68,6 +68,7 @@ val keep_asm_file : bool ref
 val optimize_for_speed : bool ref
 val dump_cmm : bool ref
 val dump_selection : bool ref
+val dump_cse : bool ref
 val dump_live : bool ref
 val dump_spill : bool ref
 val dump_split : bool ref
author	Xavier Leroy <xavier.leroy@inria.fr>	2014-04-26 10:40:22 +0000
committer	Xavier Leroy <xavier.leroy@inria.fr>	2014-04-26 10:40:22 +0000
commit	558f40e3446854913d5ce011441c4b10da03f27e (patch)
tree	84c78a2d3098937813daae9bd75d328dc0669840
parent	95d98cd9782c0577b0c7290f6535b29e7bd4cd41 (diff)