Reflecting commit 15012 on version/4.02:

PR#6484 and PR#6486: CSE across memory allocations can present the GC with memory roots that are illegal. Plus: lift the previous restriction that all arithmetic ops and loads have at most one result register, this isn't true for ARM with soft floats. Plus: code refactoring and more comments. git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@15013 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
author: Xavier Leroy <xavier.leroy@inria.fr> 2014-07-18 14:11:33 +0000
committer: Xavier Leroy <xavier.leroy@inria.fr> 2014-07-18 14:11:33 +0000
commit: 127c288ae598c852688fd3006b9e073c6cd38371 (patch)
tree: c1a34dde3cfc4d9f1e381c4f08bcac71ea61f676
parent: d7f3235c1bc3cea3e0d18b201d06fa937cd39293 (diff)
2 files changed, 133 insertions, 48 deletions
diff --git a/asmcomp/CSEgen.ml b/asmcomp/CSEgen.ml
index e1ab5eb92..228ac83f4 100644
--- a/asmcomp/CSEgen.ml
+++ b/asmcomp/CSEgen.ml
@@ -19,7 +19,7 @@ type valnum = int
 
 (* We maintain sets of equations of the form
        valnums = operation(valnums)
-   plus a mapping from registers to value numbers.  *)
+   plus a mapping from registers to valnums (value numbers). *)
 
 type rhs = operation * valnum array
 
@@ -34,6 +34,30 @@ type numbering =
 let empty_numbering =
   { num_next = 0; num_eqs = Equations.empty; num_reg = Reg.Map.empty }
 
+(** Generate a fresh value number [v] and associate it to register [r].
+  Returns a pair [(n',v)] with the updated value numbering [n']. *)
+
+let fresh_valnum_reg n r =
+  let v = n.num_next in
+  ({n with num_next = v + 1; num_reg = Reg.Map.add r v n.num_reg}, v)
+
+(* Same, for a set of registers [rs]. *)
+
+let array_fold_transf (f: numbering -> 'a -> numbering * 'b) n (a: 'a array)
+                      : numbering * 'b array =
+  match Array.length a with
+  | 0 -> (n, [||])
+  | 1 -> let (n', b) = f n a.(0) in (n', [|b|])
+  | l -> let b = Array.make l 0 and n = ref n in
+         for i = 0 to l - 1 do
+           let (n', x) = f !n a.(i) in
+           b.(i) <- x; n := n'
+         done;
+         (!n, b)
+
+let fresh_valnum_regs n rs =
+  array_fold_transf fresh_valnum_reg n rs
+
 (** [valnum_reg n r] returns the value number for the contents of
   register [r].  If none exists, a fresh value number is returned
   and associated with register [r].  The possibly updated numbering
@@ -44,19 +68,10 @@ let valnum_reg n r =
   try
     (n, Reg.Map.find r n.num_reg)
   with Not_found ->
-    let v = n.num_next in
-    ({n with num_next = v + 1; num_reg = Reg.Map.add r v n.num_reg}, v)
+    fresh_valnum_reg n r
 
 let valnum_regs n rs =
-  let l = Array.length rs in
-  let vs = Array.make l 0 in
-  let n = ref n in
-  for i = 0 to l-1 do
-    let (ni, vi) = valnum_reg !n rs.(i) in
-    vs.(i) <- vi;
-    n := ni
-  done;
-  (!n, vs)
+  array_fold_transf valnum_reg n rs
 
 (* Look up the set of equations for an equation with the given rhs.
    Return [Some res] if there is one, where [res] is the lhs. *)
@@ -67,24 +82,51 @@ let find_equation n rhs =
   with Not_found ->
     None
 
+(* Find a register containing the given value number. *)
+
+let find_reg_containing n v =
+  Reg.Map.fold (fun r v' res -> if v' = v then Some r else res)
+               n.num_reg None
+
 (* Find a set of registers containing the given value numbers. *)
 
 let find_regs_containing n vs =
   match Array.length vs with
   | 0 -> Some [||]
-  | 1 -> let v = vs.(0) in
-         Reg.Map.fold (fun r v' res -> if v' = v then Some [|r|] else res)
-                      n.num_reg None
-  | _ -> assert false
+  | 1 -> begin match find_reg_containing n vs.(0) with
+         | None -> None
+         | Some r -> Some [|r|]
+         end
+  | l -> let rs = Array.make l Reg.dummy in
+         begin try
+           for i = 0 to l - 1 do
+             match find_reg_containing n vs.(i) with
+             | None -> raise Exit
+             | Some r -> rs.(i) <- r
+           done;
+           Some rs
+         with Exit ->
+           None
+         end
+
+(* Associate the given value number to the given result register,
+   without adding new equations. *)
+
+let set_known_reg n r v =
+  { n with num_reg = Reg.Map.add r v n.num_reg }
 
 (* Associate the given value numbers to the given result registers,
    without adding new equations. *)
 
+let array_fold2 f n a1 a2 =
+  let l = Array.length a1 in
+  assert (l = Array.length a2);
+  let n = ref n in
+  for i = 0 to l - 1 do n := f !n a1.(i) a2.(i) done;
+  !n
+
 let set_known_regs n rs vs =
-  match Array.length rs with
-  | 0 -> n
-  | 1 -> { n with num_reg = Reg.Map.add rs.(0) vs.(0) n.num_reg }
-  | _ -> assert false
+  array_fold2 set_known_reg n rs vs
 
 (* Record the effect of a move: no new equations, but the result reg
    maps to the same value number as the argument reg. *)
@@ -97,13 +139,8 @@ let set_move n src dst =
    result registers [rs] to [fresh valnums]. *)
 
 let set_fresh_regs n rs rhs =
-  match Array.length rs with
-  | 0 -> { n with num_eqs = Equations.add rhs [||] n.num_eqs }
-  | 1 -> let v = n.num_next in
-         { num_next = v + 1;
-           num_eqs = Equations.add rhs [|v|] n.num_eqs;
-           num_reg = Reg.Map.add rs.(0) v n.num_reg }
-  | _ -> assert false
+  let (n1, vs) = fresh_valnum_regs n rs in
+  { n1 with num_eqs = Equations.add rhs vs n.num_eqs }
 
 (* Forget everything we know about the given result registers,
    which are receiving unpredictable values at run-time. *)
@@ -111,27 +148,40 @@ let set_fresh_regs n rs rhs =
 let set_unknown_regs n rs =
   { n with num_reg = Array.fold_right Reg.Map.remove rs n.num_reg }
 
+(* Forget everything we know about hardware registers and stack locations.
+   Used at function calls, since these can change registers arbitrarily. *)
+
+let forget_hard_regs n =
+  { n with num_reg =
+        Reg.Map.filter (fun r v -> r.Reg.loc = Reg.Unknown) n.num_reg }
+
 (* Keep only the equations satisfying the given predicate. *)
 
 let filter_equations pred n =
   { n with num_eqs = Equations.filter (fun (op,_) res -> pred op) n.num_eqs }
 
-(* Prepend a reg-reg move *)
+(* Prepend a set of moves before [i] to assign [srcs] to [dsts].  *)
+
+let insert_single_move i src dst = instr_cons (Iop Imove) [|src|] [|dst|] i
 
 let insert_move srcs dsts i =
   match Array.length srcs with
   | 0 -> i
   | 1 -> instr_cons (Iop Imove) srcs dsts i
-  | _ -> assert false
+  | l -> (* Parallel move: first copy srcs into tmps one by one,
+            then copy tmps into dsts one by one *)
+         let tmps = Reg.createv_like srcs in
+         array_fold2 insert_single_move
+           (array_fold2 insert_single_move i srcs tmps) tmps dsts
 
 (* Classification of operations *)
 
 type op_class =
-  | Op_pure     (* pure, produce one result *)
+  | Op_pure           (* pure arithmetic, produce one or several result *)
   | Op_checkbound     (* checkbound-style: no result, can raise an exn *)
   | Op_load           (* memory load *)
   | Op_store of bool  (* memory store, false = init, true = assign *)
-  | Op_other          (* anything else that does not store in memory *)
+  | Op_other   (* anything else that does not allocate nor store in memory *)
 
 class cse_generic = object (self)
 
@@ -148,7 +198,7 @@ method class_of_operation op =
   | Istackoffset _ -> Op_other
   | Iload(_,_) -> Op_load
   | Istore(_,_,asg) -> Op_store asg
-  | Ialloc _ -> Op_other
+  | Ialloc _ -> assert false                   (* treated specially *)
   | Iintop(Icheckbound) -> Op_checkbound
   | Iintop _ -> Op_pure
   | Iintop_imm(Icheckbound, _) -> Op_checkbound
@@ -177,6 +227,16 @@ method private keep_checkbounds n =
   filter_equations (fun o -> self#class_of_operation o = Op_checkbound)
                    {n with num_reg = Reg.Map.empty }
 
+(* Keep only equations involving checkbounds and loads.
+   Performed across an alloc.  We cannot reuse results of arithmetic operations
+   after an alloc, because some of these results can be derived pointers 
+   (into the heap, but not to the first field of an object) (PR#6484). *)
+
+method private keep_checkbounds_and_loads n =
+  filter_equations
+    (fun o -> let c = self#class_of_operation o in c = Op_checkbound || c = Op_load)
+    n
+
 (* Perform CSE on the given instruction [i] and its successors.
    [n] is the value numbering current at the beginning of [i]. *)
 
@@ -191,22 +251,45 @@ method private cse n i =
       let n1 = set_move n i.arg.(0) i.res.(0) in
       {i with next = self#cse n1 i.next}
   | Iop (Icall_ind | Icall_imm _ | Iextcall _) ->
-      (* We don't perform CSE across function calls, as it increases
-         register pressure too much.  We do remember the checkbound
-         instructions already performed, though, since their reuse
-         cannot increase register pressure. *)
-      let n1 = self#keep_checkbounds n in
+      (* For function calls, we should at least forget:
+         - equations involving memory loads, since the callee can
+           perform arbitrary memory stores;
+         - equations involving arithmetic operations that can
+           produce bad pointers into the heap (see below for Ialloc);
+         - mappings from hardware registers to value numbers,
+           since the callee may not preserve these registers.
+         At any rate, we don't want to perform CSE of arithmetic
+         operations across function calls, because of increased
+         register pressure.  So, we only remember:
+         - the checkbound instructions already performed, though,
+           since their reuse cannot increase register pressure
+           nor trouble the GC;
+         - mappings from unallocated pseudoregisters to value numbers. *)
+      let n1 = forget_hard_regs (self#keep_checkbounds n) in
       {i with next = self#cse n1 i.next}
+  | Iop (Ialloc _) ->
+      (* For allocations, we must avoid extending the live range of a
+         pseudoregister across the allocation if this pseudoreg can
+         contain a value that looks like a pointer into the heap but
+         is not a pointer to the beginning of a Caml object.  PR#6484
+         is an example of such as value (a derived pointer into a
+         block).  So, conservatively, we forget all equations
+         involving arithmetic operations or memory loads, keeping only
+         checkbound equations.  For registers, we do like for an
+         "Op_other" instruction. *)
+       let n1 = set_unknown_regs n (Proc.destroyed_at_oper i.desc) in
+       let n2 = self#keep_checkbounds n1 in
+       let n3 = set_unknown_regs n2 i.res in
+       {i with next = self#cse n3 i.next}
   | Iop op ->
       begin match self#class_of_operation op with
       | Op_pure | Op_checkbound | Op_load ->
-          assert (Array.length i.res <= 1);
           let (n1, varg) = valnum_regs n i.arg in
           let n2 = set_unknown_regs n1 (Proc.destroyed_at_oper i.desc) in
           begin match find_equation n1 (op, varg) with
           | Some vres ->
               (* This operation was computed earlier. *)
-              (* Is there a register that holds the result computed earlier? *)
+              (* Are there registers that hold the results computed earlier? *)
               begin match find_regs_containing n1 vres with
               | Some res when (not (self#is_cheap_operation op))
                            && (not (Proc.regs_are_volatile res)) ->
@@ -219,7 +302,10 @@ method private cse n i =
                      does not destroy any regs *)
                   insert_move res i.res (self#cse n3 i.next)
               | _ ->
-                  let n3 = set_known_regs n2 i.res vres in            
+                  (* We already computed the operation but lost its
+                     results.  Associate the result registers to
+                     the result valnums of the previous operation. *)
+                  let n3 = set_known_regs n2 i.res vres in
                   {i with next = self#cse n3 i.next}
               end
           | None ->
@@ -234,13 +320,13 @@ method private cse n i =
          let n2 = set_unknown_regs n1 i.res in
          {i with next = self#cse n2 i.next}
       | Op_store true ->
-          (* A non-initializing store: it can invalidate
+          (* A non-initializing store can invalidate
              anything we know about prior loads. *)
          let n1 = set_unknown_regs n (Proc.destroyed_at_oper i.desc) in
          let n2 = set_unknown_regs n1 i.res in
          let n3 = self#kill_loads n2 in
          {i with next = self#cse n3 i.next}
-      end        
+      end
   (* For control structures, we set the numbering to empty at every
      join point, but propagate the current numbering across fork points. *)
   | Iifthenelse(test, ifso, ifnot) ->
@@ -255,16 +341,15 @@ method private cse n i =
       {i with desc = Iloop(self#cse empty_numbering body);
               next = self#cse empty_numbering i.next}
   | Icatch(nfail, body, handler) ->
-      {i with desc = Icatch(nfail, self#cse n body, self#cse empty_numbering handler);
+      {i with desc = Icatch(nfail, self#cse n body,
+                            self#cse empty_numbering handler);
               next = self#cse empty_numbering i.next}
   | Itrywith(body, handler) ->
-      {i with desc = Itrywith(self#cse n body, self#cse empty_numbering handler);
+      {i with desc = Itrywith(self#cse n body,
+                              self#cse empty_numbering handler);
               next = self#cse empty_numbering i.next}
 
 method fundecl f =
   {f with fun_body = self#cse empty_numbering f.fun_body}
 
 end
-
-
-
diff --git a/asmcomp/CSEgen.mli b/asmcomp/CSEgen.mli
index c19855eca..ffea33891 100644
--- a/asmcomp/CSEgen.mli
+++ b/asmcomp/CSEgen.mli
@@ -18,7 +18,7 @@ type op_class =
   | Op_checkbound     (* checkbound-style: no result, can raise an exn *)
   | Op_load           (* memory load *)
   | Op_store of bool  (* memory store, false = init, true = assign *)
-  | Op_other          (* anything else that does not store in memory *)
+  | Op_other   (* anything else that does not allocate nor store in memory *)
 
 class cse_generic : object
   (* The following methods can be overriden to handle processor-specific
author	Xavier Leroy <xavier.leroy@inria.fr>	2014-07-18 14:11:33 +0000
committer	Xavier Leroy <xavier.leroy@inria.fr>	2014-07-18 14:11:33 +0000
commit	127c288ae598c852688fd3006b9e073c6cd38371 (patch)
tree	c1a34dde3cfc4d9f1e381c4f08bcac71ea61f676
parent	d7f3235c1bc3cea3e0d18b201d06fa937cd39293 (diff)