Ne plus utiliser de registres flottants

git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@825 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
author: Xavier Leroy <xavier.leroy@inria.fr> 1996-05-16 19:39:26 +0000
committer: Xavier Leroy <xavier.leroy@inria.fr> 1996-05-16 19:39:26 +0000
commit: 2a309d688f7d6e673563ab11c06fe4e294cc4bfa (patch)
tree: 42de46da0f07425f1f041cad8755eb2401324767
parent: 064465320e18f583d63fe6c008ab9b8c4c22b0ae (diff)
8 files changed, 347 insertions, 514 deletions
diff --git a/asmcomp/coloring.ml b/asmcomp/coloring.ml
index 74cfbb632..5ba3dc50c 100644
--- a/asmcomp/coloring.ml
+++ b/asmcomp/coloring.ml
@@ -46,15 +46,22 @@ let constrained = ref Reg.Set.empty
 
 let find_degree reg =
   if reg.spill then () else begin
-    let deg = ref 0 in
     let cl = Proc.register_class reg in
-    List.iter
-      (fun r -> if not r.spill & Proc.register_class r = cl then incr deg)
-      reg.interf;
-    reg.degree <- !deg;
-    if !deg >= Proc.num_available_registers.(cl)
-    then constrained := Reg.Set.add reg !constrained
-    else unconstrained := Reg.Set.add reg !unconstrained
+    let avail_regs = Proc.num_available_registers.(cl) in
+    if avail_regs = 0 then
+      (* Don't bother computing the degree if there are no regs 
+         in this class *)
+      unconstrained := Reg.Set.add reg !unconstrained
+    else begin
+      let deg = ref 0 in
+      List.iter
+        (fun r -> if not r.spill & Proc.register_class r = cl then incr deg)
+        reg.interf;
+      reg.degree <- !deg;
+      if !deg >= avail_regs
+      then constrained := Reg.Set.add reg !constrained
+      else unconstrained := Reg.Set.add reg !unconstrained
+    end
   end
 
 (* Remove a register from the interference graph *)
@@ -137,60 +144,62 @@ let assign_location reg =
   let num_regs = Proc.num_available_registers.(cl) in
   let last_reg = first_reg + num_regs in
   let score = Array.create num_regs 0 in
-  (* Favor the registers that have been assigned to pseudoregs for which
-     we have a preference. If these pseudoregs have not been assigned
-     already, avoid the registers with which they conflict. *)
-  iter_preferred
-    (fun r w ->
-      match r.loc with
-        Reg n -> if n >= first_reg & n < last_reg then
-                   score.(n - first_reg) <- score.(n - first_reg) + w
-      | Unknown ->
-          List.iter
-            (fun neighbour ->
-              match neighbour.loc with
-                Reg n -> if n >= first_reg & n < last_reg then
-                         score.(n - first_reg) <- score.(n - first_reg) - w
-              | _ -> ())
-            r.interf
-      | _ -> ())
-    reg;
-  List.iter
-    (fun neighbour ->
-      (* Prohibit the registers that have been assigned
-         to our neighbours *)
-      begin match neighbour.loc with
-        Reg n -> if n >= first_reg & n < last_reg then
-                   score.(n - first_reg) <- (-1000000)
-      | _ -> ()
-      end;
-      (* Avoid the registers that have been assigned to pseudoregs
-         for which our neighbours have a preference *)
-      iter_preferred
-        (fun r w ->
-          match r.loc with
-            Reg n -> if n >= first_reg & n < last_reg then
-                       score.(n - first_reg) <- score.(n - first_reg) - (w - 1)
-                     (* w-1 to break the symmetry when two conflicting regs
-                        have the same preference for a third reg. *)
-          | _ -> ())
-        neighbour)
-    reg.interf;
-  (* Pick the register with the best score *)
   let best_score = ref (-1000000) and best_reg = ref (-1) in
   let start = start_register.(cl) in
-  for n = start to num_regs - 1 do
-    if score.(n) > !best_score then begin
-      best_score := score.(n);
-      best_reg := n
-    end
-  done;
-  for n = 0 to start - 1 do
-    if score.(n) > !best_score then begin
-      best_score := score.(n);
-      best_reg := n
-    end
-  done;
+  if num_regs > 0 then begin
+    (* Favor the registers that have been assigned to pseudoregs for which
+       we have a preference. If these pseudoregs have not been assigned
+       already, avoid the registers with which they conflict. *)
+    iter_preferred
+      (fun r w ->
+        match r.loc with
+          Reg n -> if n >= first_reg & n < last_reg then
+                     score.(n - first_reg) <- score.(n - first_reg) + w
+        | Unknown ->
+            List.iter
+              (fun neighbour ->
+                match neighbour.loc with
+                  Reg n -> if n >= first_reg & n < last_reg then
+                           score.(n - first_reg) <- score.(n - first_reg) - w
+                | _ -> ())
+              r.interf
+        | _ -> ())
+      reg;
+    List.iter
+      (fun neighbour ->
+        (* Prohibit the registers that have been assigned
+           to our neighbours *)
+        begin match neighbour.loc with
+          Reg n -> if n >= first_reg & n < last_reg then
+                     score.(n - first_reg) <- (-1000000)
+        | _ -> ()
+        end;
+        (* Avoid the registers that have been assigned to pseudoregs
+           for which our neighbours have a preference *)
+        iter_preferred
+          (fun r w ->
+            match r.loc with
+              Reg n -> if n >= first_reg & n < last_reg then
+                         score.(n - first_reg) <- score.(n - first_reg) - (w - 1)
+                       (* w-1 to break the symmetry when two conflicting regs
+                          have the same preference for a third reg. *)
+            | _ -> ())
+          neighbour)
+      reg.interf;
+    (* Pick the register with the best score *)
+    for n = start to num_regs - 1 do
+      if score.(n) > !best_score then begin
+        best_score := score.(n);
+        best_reg := n
+      end
+    done;
+    for n = 0 to start - 1 do
+      if score.(n) > !best_score then begin
+        best_score := score.(n);
+        best_reg := n
+      end
+    done
+  end;
   (* Found a register? *)
   if !best_reg >= 0 then begin
     reg.loc <- Reg(first_reg + !best_reg);
diff --git a/asmcomp/emit_i386.mlp b/asmcomp/emit_i386.mlp
index 5ff2ca0d3..a6e78d5f5 100644
--- a/asmcomp/emit_i386.mlp
+++ b/asmcomp/emit_i386.mlp
@@ -71,23 +71,10 @@ let emit_align =
     "linux_elf" -> (fun n -> `	.align	{emit_int n}\n`)
   | _           -> (fun n -> `	.align	{emit_int(Misc.log2 n)}\n`)
   
-(* Track the position of the floating-point stack *)
-
-let fp_offset = ref 0
-
-let push_fp () =
-  incr fp_offset;
-  if !fp_offset > 4 then fatal_error "Emit: float expression too complex"
-
-let pop_fp () =
-  decr fp_offset
-
 (* Output a pseudo-register *)
 
 let emit_reg = function
-    { loc = Reg r; typ = Float } ->
-      emit_string (register_name(r + !fp_offset))
-  | { loc = Reg r } ->
+    { loc = Reg r } ->
       emit_string (register_name r)
   | { loc = Stack s } as r ->
       let ofs = slot_offset s (register_class r) in
@@ -122,17 +109,6 @@ let register_overlap reg arr =
   with Exit ->
     true
 
-(* Check if a set of registers contains a float *)
-
-let contains_floats arr =
-  try
-    for i = 0 to Array.length arr - 1 do
-      if arr.(i).typ = Float then raise Exit
-    done;
-    false
-  with Exit ->
-    true
-
 (* Output an addressing mode *)
 
 let emit_addressing addr r n =
@@ -153,13 +129,6 @@ let emit_addressing addr r n =
       if d <> 0 then emit_int d;
       `({emit_reg r.(n)}, {emit_reg r.(n+1)}, {emit_int scale})`
 
-(* Emit the operand of a floating-point operation *)
-
-let emit_float_operand r =
-  match r.loc with
-    Stack s -> `l	{emit_reg r}`
-  | _       -> `	{emit_reg r}` 
-
 (* Record live pointers at call points *)
 
 type frame_descr =
@@ -229,21 +198,21 @@ let instr_for_intop = function
 let instr_for_floatop = function
     Inegf -> "fchs"
   | Iabsf -> "fabs"
-  | Iaddf -> "fadd"
-  | Isubf -> "fsub"
-  | Imulf -> "fmul"
-  | Idivf -> "fdiv"
-  | Ispecific Isubfrev -> "fsubr"
-  | Ispecific Idivfrev -> "fdivr"
+  | Iaddf -> "faddl"
+  | Isubf -> "fsubl"
+  | Imulf -> "fmull"
+  | Idivf -> "fdivl"
+  | Ispecific Isubfrev -> "fsubrl"
+  | Ispecific Idivfrev -> "fdivrl"
   | _ -> fatal_error "Emit_i386: instr_for_floatop"
 
 let instr_for_floatop_reversed = function
-    Iaddf -> "fadd"
-  | Isubf -> "fsubr"
-  | Imulf -> "fmul"
-  | Idivf -> "fdivr"
-  | Ispecific Isubfrev -> "fsub"
-  | Ispecific Idivfrev -> "fdiv"
+    Iaddf -> "faddl"
+  | Isubf -> "fsubrl"
+  | Imulf -> "fmull"
+  | Idivf -> "fdivrl"
+  | Ispecific Isubfrev -> "fsubl"
+  | Ispecific Idivfrev -> "fdivl"
   | _ -> fatal_error "Emit_i386: instr_for_floatop_reversed"
 
 let instr_for_floatop_pop = function
@@ -295,27 +264,23 @@ let range_check_trap = ref 0
 
 let float_constants = ref ([] : (int * string) list)
 
+let tos = phys_reg 100
+
 let emit_instr i =
     match i.desc with
       Lend -> ()
     | Lop(Imove | Ispill | Ireload) ->
-        if i.arg.(0).loc <> i.res.(0).loc then begin
-          match i.arg.(0).typ with
-            Int | Addr ->
-              `	movl	{emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n`
-          | Float ->
-              begin match i.arg.(0).loc with
-                Reg 100 ->              (* top of FP stack *)
-                  `	fstpl	{emit_reg i.res.(0)}\n`;
-                  pop_fp()
-              | Reg 101 when !fp_offset = 0 -> (* %st(0) *)
-                  `	fstl	{emit_reg i.res.(0)}\n`
-              | _ ->
-                  `	fldl	{emit_reg i.arg.(0)}\n`;
-                  push_fp();
-                  `	fstpl	{emit_reg i.res.(0)}\n`;
-                  pop_fp()
-              end
+        let src = i.arg.(0) and dst = i.res.(0) in
+        if src.loc <> dst.loc then begin
+          if src.typ = Float then
+            if src = tos then
+              `	fstpl	{emit_reg dst}\n`
+            else begin
+              `	fldl	{emit_reg src}\n`;
+              `	fstpl	{emit_reg dst}\n`
+            end
+          else
+            `	movl	{emit_reg src}, {emit_reg dst}\n`
         end
     | Lop(Iconst_int 0) ->
         begin match i.res.(0).loc with
@@ -334,8 +299,7 @@ let emit_instr i =
           let lbl = new_label() in
           float_constants := (lbl, s) :: !float_constants;
           `	fldl	{emit_label lbl}\n`
-        end;
-        push_fp()
+        end
     | Lop(Iconst_symbol s) ->
         `	movl	${emit_symbol s}, {emit_reg i.res.(0)}\n`
     | Lop(Icall_ind) ->
@@ -360,12 +324,6 @@ let emit_instr i =
           `	call	{emit_symbol "caml_c_call"}\n`;
           record_frame i.live
         end else begin
-          if contains_floats i.arg or contains_floats i.res then begin
-            `	ffree	%st(0)\n`;
-            `	ffree	%st(1)\n`;
-            `	ffree	%st(2)\n`;
-            `	ffree	%st(3)\n`
-          end;
           `	call	{emit_symbol s}\n`
         end
     | Lop(Istackoffset n) ->
@@ -396,23 +354,18 @@ let emit_instr i =
                 `	movswl	{emit_addressing addr i.arg 0}, {emit_reg dest}\n`
             end
         | Float ->
-            `	fldl	{emit_addressing addr i.arg 0}\n`;
-            push_fp()
+            `	fldl	{emit_addressing addr i.arg 0}\n`
         end
     | Lop(Istore(Word, addr)) ->
         begin match i.arg.(0).typ with
           Int | Addr ->
             `	movl	{emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
         | Float ->
-            begin match i.arg.(0).loc with
-              Reg 100 ->                (* top of FP stack *)
-                `	fstpl	{emit_addressing addr i.arg 1}\n`;
-                pop_fp()
-            | Reg 101 when !fp_offset = 0 -> (* %st(0) *)
-                `	fstl	{emit_addressing addr i.arg 1}\n`
-            | _ ->
-                `	fldl	{emit_reg i.arg.(0)}\n`;
-                `	fstpl	{emit_addressing addr i.arg 1}\n`
+            if i.arg.(0) = tos then
+              `	fstpl	{emit_addressing addr i.arg 1}\n`
+            else begin
+              `	fldl	{emit_reg i.arg.(0)}\n`;
+              `	fstpl	{emit_addressing addr i.arg 1}\n`
             end
         end
     | Lop(Istore(chunk, addr)) ->
@@ -500,25 +453,24 @@ let emit_instr i =
         (* We have i.arg.(0) = i.res.(0) *)
         `	{emit_string(instr_for_intop op)}	${emit_int n}, {emit_reg i.res.(0)}\n`
     | Lop(Inegf | Iabsf as floatop) ->
-        if i.arg.(0).loc <> Reg 100 then begin
+        if i.arg.(0) <> tos then
           `	fldl	{emit_reg i.arg.(0)}\n`;
-          push_fp()
-        end;
         `	{emit_string(instr_for_floatop floatop)}\n`
     | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev)
           as floatop) ->
-        begin match (i.arg.(0).loc, i.arg.(1).loc) with
-          (Reg 100, Reg 100) ->         (* both operands on top of FP stack *)
-            `	{emit_string(instr_for_floatop_pop floatop)}	%st(0), %st(1)\n`;
-            pop_fp()
-        | (Reg 100, _) ->               (* first operand on stack *)
-            `	{emit_string(instr_for_floatop floatop)}{emit_float_operand i.arg.(1)}\n`
-        | (_, Reg 100) ->               (* second operand on stack *)
-            `	{emit_string(instr_for_floatop_reversed floatop)}{emit_float_operand i.arg.(0)}\n`
-        | (_, _) ->                     (* both in regs or on stack *)
-            `	fldl	{emit_reg i.arg.(0)}\n`;
-            push_fp();
-            `	{emit_string(instr_for_floatop floatop)}{emit_float_operand i.arg.(1)}\n`
+        if i.arg.(0) = tos && i.arg.(1) = tos then
+          (* both operands on top of FP stack *)
+          `	{emit_string(instr_for_floatop_pop floatop)}	%st(0), %st(1)\n`
+        else if i.arg.(0) = tos then
+          (* first operand on stack *)
+          `	{emit_string(instr_for_floatop floatop)}	{emit_reg i.arg.(1)}\n`
+        else if i.arg.(1) = tos then
+          (* second operand on stack *)
+          `	{emit_string(instr_for_floatop_reversed floatop)}	{emit_reg i.arg.(0)}\n`
+        else begin
+          (* both operands in memory *)
+          `	fldl	{emit_reg i.arg.(0)}\n`;
+          `	{emit_string(instr_for_floatop floatop)}	{emit_reg i.arg.(1)}\n`
         end
     | Lop(Ifloatofint) ->
         begin match i.arg.(0).loc with
@@ -528,13 +480,10 @@ let emit_instr i =
             `	pushl	{emit_reg i.arg.(0)}\n`;
             `	fildl	(%esp)\n`;
             `	addl	$4, %esp\n`
-        end;
-        push_fp()
+        end
     | Lop(Iintoffloat) ->
-        if i.arg.(0).loc <> Reg 100 then begin
+        if i.arg.(0) <> tos then
           `	fldl	{emit_reg i.arg.(0)}\n`;
-          push_fp()
-        end;
         stack_offset := !stack_offset - 8;
         `	subl	$8, %esp\n`;
         `	fnstcw	4(%esp)\n`;
@@ -549,7 +498,6 @@ let emit_instr i =
             `	fistpl	(%esp)\n`;
             `	movl	(%esp), {emit_reg i.res.(0)}\n`
         end;
-        pop_fp();
         `	fldcw	4(%esp)\n`;
         `	addl	$8, %esp\n`;
         stack_offset := !stack_offset + 8
@@ -566,22 +514,14 @@ let emit_instr i =
         for n = Array.length i.arg - 1 downto 0 do
           let r = i.arg.(n) in
           match r with
-            {loc = Reg rn; typ = Float} ->
+            {loc = Reg _; typ = Float} ->
               `	subl	$8, %esp\n`;
-              stack_offset := !stack_offset + 8;
-              begin match rn with
-                100 ->
-                  `	fstpl	0(%esp)\n`;
-                  pop_fp()
-              | 101 when !fp_offset = 0 ->
-                  `	fstl	0(%esp)\n`
-              | _ ->
-                  `	fldl	{emit_reg r}\n`;
-                  `	fstpl	0(%esp)\n`
-              end
+              `	fstpl	0(%esp)\n`;
+              stack_offset := !stack_offset + 8
           | {loc = Stack sl; typ = Float} ->
-              `	pushl	4+{emit_reg r}\n`;
-              `	pushl	4+{emit_reg r}\n`;
+              let ofs = slot_offset sl 1 in
+              `	pushl	{emit_int(ofs + 4)}(%esp)\n`;
+              `	pushl	{emit_int(ofs + 4)}(%esp)\n`;
               stack_offset := !stack_offset + 8
           | _ ->
               `	pushl	{emit_reg r}\n`;
@@ -601,10 +541,8 @@ let emit_instr i =
         `	pushl	{emit_addressing addr i.arg 0}\n`;
         stack_offset := !stack_offset + 4
     | Lop(Ispecific(Ifloatarithmem(op, addr))) ->
-        if i.arg.(0).loc <> Reg 100 then begin
+        if i.arg.(0) <> tos then
           `	fldl	{emit_reg i.arg.(0)}\n`;
-          push_fp()
-        end;
         `	{emit_string(instr_for_floatarithmem op)}	{emit_addressing addr i.arg 1}\n`
     | Lreloadretaddr ->
         ()
@@ -636,54 +574,45 @@ let emit_instr i =
             `	cmpl	${emit_int n}, {emit_reg i.arg.(0)}\n`;
             let b = name_for_cond_branch cmp in
             `	j{emit_string b}	{emit_label lbl}\n`
+        | Ifloattest((Ceq | Cne as cmp), neg) ->
+            if i.arg.(1) <> tos then
+              `	fldl	{emit_reg i.arg.(1)}\n`;
+            if i.arg.(0) <> tos then
+              `	fldl	{emit_reg i.arg.(0)}\n`;
+            `	fucompp\n`;
+            `	fnstsw	%ax\n`;
+            let neg1 = if cmp = Ceq then neg else not neg in
+            if neg1 then begin          (* branch if different *)
+              `	andb	$68, %ah\n`;
+              `	xorb	$64, %ah\n`;
+              `	jne	{emit_label lbl}\n`
+            end else begin              (* branch if equal *)
+              `	andb	$69, %ah\n`;
+              `	cmpb	$64, %ah\n`;
+              `	je	{emit_label lbl}\n`
+            end
         | Ifloattest(cmp, neg) ->
-            let instr =
-              match cmp with
-                Ceq | Cne -> "fucom"
-              | _         -> "fcom" in
             let actual_cmp =
-              match (i.arg.(0).loc, i.arg.(1).loc) with
-                (Reg 100, Reg 100) ->     (* both args on top of FP stack *)
-                  `	{emit_string instr}pp\n`;
-                  fp_offset := !fp_offset - 2;
-                  cmp
-              | (Reg 100, _) ->           (* first arg on top of FP stack *)
-                  `	{emit_string instr}p{emit_float_operand i.arg.(1)}\n`;
-                  pop_fp();
-                  cmp
-              | (_, Reg 100) ->           (* second arg on top of FP stack *)
-                  `	{emit_string instr}p{emit_float_operand i.arg.(0)}\n`;
-                  pop_fp();
-                  Cmm.swap_comparison cmp
-              | (_, _) ->
-                  `	fldl	{emit_reg i.arg.(0)}\n`;
-                  push_fp();
-                  `	{emit_string instr}p{emit_float_operand i.arg.(1)}\n`;
-                  pop_fp();
-                  cmp in
+              if i.arg.(0) = tos && i.arg.(1) = tos then begin
+                (* both args on top of FP stack *)
+                `	fcompp\n`;
+                cmp
+              end else if i.arg.(0) = tos then begin
+                (* first arg on top of FP stack *)
+                `	fcompl	{emit_reg i.arg.(1)}\n`;
+                cmp
+              end else if i.arg.(1) = tos then begin
+                (* second arg on top of FP stack *)
+                `	fcompl	{emit_reg i.arg.(0)}\n`;
+                Cmm.swap_comparison cmp
+              end else begin
+                `	fldl	{emit_reg i.arg.(0)}\n`;
+                `	fcompl	{emit_reg i.arg.(1)}\n`;
+                cmp
+              end in
             `	fnstsw	%ax\n`;
             begin match actual_cmp with
-              Ceq ->
-                if neg then begin
-                  `	andb	$68, %ah\n`;
-                  `	xorb	$64, %ah\n`;
-                  `	jne	`
-                end else begin
-                  `	andb	$69, %ah\n`;
-                  `	cmpb	$64, %ah\n`;
-                  `	je	`
-                end
-            | Cne ->
-                if neg then begin
-                  `	andb	$69, %ah\n`;
-                  `	cmpb	$64, %ah\n`;
-                  `	je	`
-                end else begin
-                  `	andb	$68, %ah\n`;
-                  `	xorb	$64, %ah\n`;
-                  `	jne	`
-                end
-            | Cle ->
+              Cle ->
                 `	andb	$69, %ah\n`;
                 `	decb	%ah\n`;
                 `	cmpb	$64, %ah\n`;
@@ -706,6 +635,7 @@ let emit_instr i =
                 if neg
                 then `	jne	`
                 else `	je	`
+            | _ -> fatal_error "Emit_i386: floattest"
             end;
             `{emit_label lbl}\n`
         | Ioddtest ->
diff --git a/asmcomp/emit_i386nt.mlp b/asmcomp/emit_i386nt.mlp
index 0592fc9a1..e709f4bf6 100644
--- a/asmcomp/emit_i386nt.mlp
+++ b/asmcomp/emit_i386nt.mlp
@@ -45,7 +45,8 @@ let slot_offset loc cl =
       else !stack_offset + num_stack_slots.(0) * 4 + n * 8
   | Outgoing n -> n
 
-(* Record symbols used and defined - at the end generate extern for those used but not defined *)
+(* Record symbols used and defined - at the end generate extern for those 
+   used but not defined *)
 
 let symbols_defined = ref StringSet.empty
 let symbols_used = ref StringSet.empty
@@ -68,23 +69,10 @@ let emit_label lbl =
 
 let emit_align n = `	ALIGN	{emit_int n}\n`
   
-(* Track the position of the floating-point stack *)
-
-let fp_offset = ref 0
-
-let push_fp () =
-  incr fp_offset;
-  if !fp_offset > 4 then fatal_error "Emit: float expression too complex"
-
-let pop_fp () =
-  decr fp_offset
-
 (* Output a pseudo-register *)
 
 let emit_reg = function
-    { loc = Reg r; typ = Float } ->
-      emit_string (register_name(r + !fp_offset))
-  | { loc = Reg r } ->
+    { loc = Reg r } ->
       emit_string (register_name r)
   | { loc = Stack s; typ = Float } as r ->
       let ofs = slot_offset s (register_class r) in
@@ -122,17 +110,6 @@ let register_overlap reg arr =
   with Exit ->
     true
 
-(* Check if a set of registers contains a float *)
-
-let contains_floats arr =
-  try
-    for i = 0 to Array.length arr - 1 do
-      if arr.(i).typ = Float then raise Exit
-    done;
-    false
-  with Exit ->
-    true
-
 (* Output an addressing mode *)
 
 let emit_signed_int d =
@@ -153,13 +130,6 @@ let emit_addressing addr r n =
   | Iindexed2scaled(scale, d) ->
       `[{emit_reg r.(n)}+{emit_reg r.(n+1)}*{emit_int scale}{emit_signed_int d}]`
 
-(* Emit the operand of a floating-point operation *)
-
-let emit_float_operand r =
-  match r.loc with
-    Reg _   -> `st, {emit_reg r}`
-  | _       -> `{emit_reg r}`
-
 (* Record live pointers at call points *)
 
 type frame_descr =
@@ -286,27 +256,23 @@ let range_check_trap = ref 0
 
 let float_constants = ref ([] : (int * string) list)
 
+let tos = phys_reg 100
+
 let emit_instr i =
     match i.desc with
       Lend -> ()
     | Lop(Imove | Ispill | Ireload) ->
-        if i.arg.(0).loc <> i.res.(0).loc then begin
-          match i.arg.(0).typ with
-            Int | Addr ->
-              `	mov	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
-          | Float ->
-              begin match i.arg.(0).loc with
-                Reg 100 ->              (* top of FP stack *)
-                  `	fstp	{emit_reg i.res.(0)}\n`;
-                  pop_fp()
-              | Reg 101 when !fp_offset = 0 -> (* %st(0) *)
-                  `	fst	{emit_reg i.res.(0)}\n`
-              | _ ->
-                  `	fld	{emit_reg i.arg.(0)}\n`;
-                  push_fp();
-                  `	fstp	{emit_reg i.res.(0)}\n`;
-                  pop_fp()
-              end
+        let src = i.arg.(0) and dst = i.res.(0) in
+        if src.loc <> dst.loc then begin
+          if src.typ = Float then
+            if src = tos then
+              `	fstp	{emit_reg dst}\n`
+            else begin
+              `	fld	{emit_reg src}\n`;
+              `	fstp	{emit_reg dst}\n`
+            end
+          else
+            `	movl	{emit_reg dst}, {emit_reg src}\n`
         end
     | Lop(Iconst_int 0) ->
         begin match i.res.(0).loc with
@@ -325,8 +291,7 @@ let emit_instr i =
           let lbl = new_label() in
           float_constants := (lbl, s) :: !float_constants;
           `	fld	{emit_label lbl}\n`
-        end;
-        push_fp()
+        end
     | Lop(Iconst_symbol s) ->
         add_used_symbol s;
         `	mov	{emit_reg i.res.(0)}, OFFSET {emit_symbol s}\n`
@@ -355,12 +320,6 @@ let emit_instr i =
           `	call	_caml_c_call\n`;
           record_frame i.live
         end else begin
-          if contains_floats i.arg or contains_floats i.res then begin
-            `	ffree	st(0)\n`;
-            `	ffree	st(1)\n`;
-            `	ffree	st(2)\n`;
-            `	ffree	st(3)\n`
-          end;
           `	call	{emit_symbol s}\n`
         end
     | Lop(Istackoffset n) ->
@@ -391,21 +350,16 @@ let emit_instr i =
                 `	movsx	{emit_reg dest}, WORD PTR {emit_addressing addr i.arg 0}\n`
             end
         | Float ->
-            `	fld	REAL8 PTR {emit_addressing addr i.arg 0}\n`;
-            push_fp()
+            `	fld	REAL8 PTR {emit_addressing addr i.arg 0}\n`
         end
     | Lop(Istore(Word, addr)) ->
         begin match i.arg.(0).typ with
           Int | Addr ->
             `	mov	DWORD PTR {emit_addressing addr i.arg 1}, {emit_reg i.arg.(0)}\n`
         | Float ->
-            begin match i.arg.(0).loc with
-              Reg 100 ->                (* top of FP stack *)
-                `	fstp	REAL8 PTR {emit_addressing addr i.arg 1}\n`;
-                pop_fp()
-            | Reg 101 when !fp_offset = 0 -> (* %st(0) *)
-                `	fst	REAL8 PTR {emit_addressing addr i.arg 1}\n`
-            | _ ->
+            if i.arg.(0) = tos then
+                `	fstp	REAL8 PTR {emit_addressing addr i.arg 1}\n`
+            else begin
                 `	fld	{emit_reg i.arg.(0)}\n`;
                 `	fstp	REAL8 PTR {emit_addressing addr i.arg 1}\n`
             end
@@ -495,25 +449,24 @@ let emit_instr i =
         (* We have i.arg.(0) = i.res.(0) *)
         `	{emit_string(instr_for_intop op)}	{emit_reg i.res.(0)}, {emit_int n}\n`
     | Lop(Inegf | Iabsf as floatop) ->
-        if i.arg.(0).loc <> Reg 100 then begin
+        if i.arg.(0) <> tos then begin
           `	fld	{emit_reg i.arg.(0)}\n`;
-          push_fp()
-        end;
         `	{emit_string(instr_for_floatop floatop)}\n`
     | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev)
           as floatop) ->
-        begin match (i.arg.(0).loc, i.arg.(1).loc) with
-          (Reg 100, Reg 100) ->         (* both operands on top of FP stack *)
-            `	{emit_string(instr_for_floatop_reversed floatop)}\n`;
-            pop_fp()
-        | (Reg 100, _) ->               (* first operand on stack *)
-            `	{emit_string(instr_for_floatop floatop)}	{emit_float_operand i.arg.(1)}\n`
-        | (_, Reg 100) ->               (* second operand on stack *)
-            `	{emit_string(instr_for_floatop_reversed floatop)}	{emit_float_operand i.arg.(0)}\n`
-        | (_, _) ->                     (* both in regs or on stack *)
-            `	fld	{emit_reg i.arg.(0)}\n`;
-            push_fp();
-            `	{emit_string(instr_for_floatop floatop)}	{emit_float_operand i.arg.(1)}\n`
+        if i.arg.(0) = tos && i.arg.(1) = tos then
+          (* both operands on top of FP stack *)
+          `	{emit_string(instr_for_floatop_reversed floatop)}\n`
+        else if i.arg.(0) = tos then
+          (* first operand on stack *)
+          `	{emit_string(instr_for_floatop floatop)}	{emit_reg i.arg.(1)}\n`
+        else if i.arg.(1) = tos then
+          (* second operand on stack *)
+          `	{emit_string(instr_for_floatop_reversed floatop)}	{emit_reg i.arg.(0)}\n`
+        else begin
+          (* both operands in memory *)
+          `	fld	{emit_reg i.arg.(0)}\n`;
+          `	{emit_string(instr_for_floatop floatop)}	{emit_reg i.arg.(1)}\n`
         end
     | Lop(Ifloatofint) ->
         begin match i.arg.(0).loc with
@@ -523,13 +476,10 @@ let emit_instr i =
             `	push	{emit_reg i.arg.(0)}\n`;
             `	fild	DWORD PTR [esp]\n`;
             `	add	esp, 4\n`
-        end;
-        push_fp()
+        end
     | Lop(Iintoffloat) ->
-        if i.arg.(0).loc <> Reg 100 then begin
+        if i.arg.(0) <> tos then
           `	fld	{emit_reg i.arg.(0)}\n`;
-          push_fp()
-        end;
         stack_offset := !stack_offset - 8;
         `	sub	esp, 8\n`;
         `	fnstcw	[esp+4]\n`;
@@ -544,7 +494,6 @@ let emit_instr i =
             `	fistp	DWORD PTR [esp]\n`;
             `	mov	{emit_reg i.res.(0)}, [esp]\n`
         end;
-        pop_fp();
         `	fldcw	[esp+4]\n`;
         `	add	esp, 8\n`;
         stack_offset := !stack_offset + 8
@@ -564,17 +513,8 @@ let emit_instr i =
           match r with
             {loc = Reg rn; typ = Float} ->
               `	sub	esp, 8\n`;
-              stack_offset := !stack_offset + 8;
-              begin match rn with
-                100 ->
-                  `	fstp	REAL8 PTR 0[esp]\n`;
-                  pop_fp()
-              | 101 when !fp_offset = 0 ->
-                  `	fst	REAL8 PTR 0[esp]\n`
-              | _ ->
-                  `	fld	{emit_reg r}\n`;
-                  `	fstp	REAL8 PTR 0[esp]\n`
-              end
+              `	fstp	REAL8 PTR 0[esp]\n`;
+              stack_offset := !stack_offset + 8
           | {loc = Stack sl; typ = Float} ->
               let ofs = slot_offset sl 1 in
               `	push	DWORD PTR {emit_int (ofs + 4)}[esp]\n`;
@@ -598,10 +538,8 @@ let emit_instr i =
         `	push	DWORD PTR {emit_addressing addr i.arg 0}\n`;
         stack_offset := !stack_offset + 4
     | Lop(Ispecific(Ifloatarithmem(op, addr))) ->
-        if i.arg.(0).loc <> Reg 100 then begin
+        if i.arg.(0) <> tos then
           `	fld	{emit_reg i.arg.(0)}\n`;
-          push_fp()
-        end;
         `	{emit_string(instr_for_floatarithmem op)}	REAL8 PTR {emit_addressing addr i.arg 1}\n`
     | Lreloadretaddr ->
         ()
@@ -633,54 +571,45 @@ let emit_instr i =
             `	cmp	{emit_reg i.arg.(0)}, {emit_int n}\n`;
             let b = name_for_cond_branch cmp in
             `	j{emit_string b}	{emit_label lbl}\n`
+        | Ifloattest((Ceq | Cne as cmp), neg) ->
+            if i.arg.(1) <> tos then
+              `	fld	{emit_reg i.arg.(1)}\n`;
+            if i.arg.(0) <> tos then
+              `	fld	{emit_reg i.arg.(0)}\n`;
+            `	fucompp\n`;
+            `	fnstsw	ax\n`;
+            let neg1 = if cmp = Ceq then neg else not neg in
+            if neg1 then begin          (* branch if different *)
+              `	and	ah, 68\n`;
+              `	xor	ah, 64\n`;
+              `	jne	{emit_label lbl}\n`
+            end else begin              (* branch if equal *)
+              `	and	ah, 69\n`;
+              `	cmp	ah, 64\n`;
+              `	je	{emit_label lbl}\n`
+            end
         | Ifloattest(cmp, neg) ->
-            let instr =
-              match cmp with
-                Ceq | Cne -> "fucom"
-              | _         -> "fcom" in
             let actual_cmp =
-              match (i.arg.(0).loc, i.arg.(1).loc) with
-                (Reg 100, Reg 100) ->     (* both args on top of FP stack *)
-                  `	{emit_string instr}pp\n`;
-                  fp_offset := !fp_offset - 2;
-                  cmp
-              | (Reg 100, _) ->           (* first arg on top of FP stack *)
-                  `	{emit_string instr}p	{emit_reg i.arg.(1)}\n`;
-                  pop_fp();
-                  cmp
-              | (_, Reg 100) ->           (* second arg on top of FP stack *)
-                  `	{emit_string instr}p	{emit_reg i.arg.(0)}\n`;
-                  pop_fp();
-                  Cmm.swap_comparison cmp
-              | (_, _) ->
-                  `	fld	{emit_reg i.arg.(0)}\n`;
-                  push_fp();
-                  `	{emit_string instr}p	{emit_reg i.arg.(1)}\n`;
-                  pop_fp();
-                  cmp in
+              if i.arg.(0) = tos && i.arg.(1) = tos then begin
+                (* both args on top of FP stack *)
+                `	fcompp\n`;
+                cmp
+              end else if i.arg.(0) = tos then begin
+                (* first arg on top of FP stack *)
+                `	fcomp	{emit_reg i.arg.(1)}\n`;
+                cmp
+              end else if i.arg.(1) = tos then begin
+                (* second arg on top of FP stack *)
+                `	fcomp	{emit_reg i.arg.(0)}\n`;
+                Cmm.swap_comparison cmp
+              end else begin
+                `	fld	{emit_reg i.arg.(0)}\n`;
+                `	fcomp	{emit_reg i.arg.(1)}\n`;
+                cmp
+              end in
             `	fnstsw	ax\n`;
             begin match actual_cmp with
-              Ceq ->
-                if neg then begin
-                `	and	ah, 68\n`;
-                `	xor	ah, 64\n`;
-                `	jne	`
-                end else begin
-                `	and	ah, 69\n`;
-                `	cmp	ah, 64\n`;
-                `	je	`
-                end
-            | Cne ->
-                if neg then begin
-                `	and	ah, 69\n`;
-                `	cmp	ah, 64\n`;
-                `	je	`
-                end else begin
-                `	and	ah, 68\n`;
-                `	xor	ah, 64\n`;
-                `	jne	`
-                end
-            | Cle ->
+              Cle ->
                 `	and	ah, 69\n`;
                 `	dec	ah\n`;
                 `	cmp	ah, 64\n`;
@@ -703,6 +632,7 @@ let emit_instr i =
                 if neg
                 then `	jne	`
                 else `	je	`
+            | _ -> fatal_error "Emit_i386: floattest"
             end;
             `{emit_label lbl}\n`
         | Ioddtest ->
diff --git a/asmcomp/proc_i386.ml b/asmcomp/proc_i386.ml
index ba280f9f4..b29a657a1 100644
--- a/asmcomp/proc_i386.ml
+++ b/asmcomp/proc_i386.ml
@@ -31,19 +31,13 @@ open Mach
     edi         5
     ebp         6
 
-    f0 - f3     101-104         function arguments and results
-                                f0: C function results
-                                not preserved by C
-
-   The other 4 floating-point registers are treated as a stack.
-   We use the pseudo-register %tos (100) to represent the top of that stack. *)
+    tos         100             top of floating-point stack. *)
 
 let int_reg_name =
   [| "%eax"; "%ebx"; "%ecx"; "%edx"; "%esi"; "%edi"; "%ebp" |]
 
 let float_reg_name =
-  [| "%tos"; "%st(0)"; "%st(1)"; "%st(2)"; "%st(3)";
-     "%st(4)"; "%st(5)"; "%st(6)"; "%st(7)" |]
+  [| "%tos" |]
 
 let num_register_classes = 2
 
@@ -53,15 +47,15 @@ let register_class r =
   | Addr -> 0
   | Float -> 1
 
-let num_available_registers = [| 7; 4 |]
+let num_available_registers = [| 7; 0 |]
 
-let first_available_register = [| 0; 101 |]
+let first_available_register = [| 0; 100 |]
 
 let register_name r =
   if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
 
-(* There is little scheduling, and some operations are more efficient when
-   %eax or %st(0) are arguments *)
+(* There is little scheduling, and some operations are more compact
+   when their argument is %eax. *)
 
 let rotate_registers = false
 
@@ -72,10 +66,7 @@ let hard_int_reg =
   for i = 0 to 6 do v.(i) <- Reg.at_location Int (Reg i) done;
   v
 
-let hard_float_reg =
-  let v = Array.create 5 Reg.dummy in
-  for i = 0 to 4 do v.(i) <- Reg.at_location Float (Reg(i + 100)) done;
-  v
+let hard_float_reg = [| Reg.at_location Float (Reg 100) |]
 
 let all_phys_regs =
   Array.append hard_int_reg hard_float_reg
@@ -346,23 +337,23 @@ let outgoing ofs = Outgoing ofs
 let not_supported ofs = fatal_error "Proc.loc_results: cannot call"
 
 let loc_arguments arg =
-  calling_conventions 0 5 101 104 outgoing arg
+  calling_conventions 0 5 100 99 outgoing arg
 let loc_parameters arg =
-  let (loc, ofs) = calling_conventions 0 5 101 104 incoming arg in loc
+  let (loc, ofs) = calling_conventions 0 5 100 99 incoming arg in loc
 let loc_results res =
-  let (loc, ofs) = calling_conventions 0 5 101 104 not_supported res in loc
+  let (loc, ofs) = calling_conventions 0 5 100 100 not_supported res in loc
 let extcall_use_push = true
 let loc_external_arguments arg =
   fatal_error "Proc.loc_external_arguments"
 let loc_external_results res =
-  let (loc, ofs) = calling_conventions 0 0 101 101 not_supported res in loc
+  let (loc, ofs) = calling_conventions 0 0 100 100 not_supported res in loc
 
 let loc_exn_bucket = eax
 
 (* Registers destroyed by operations *)
 
 let destroyed_at_c_call =               (* ebx, esi, edi, ebp preserved *)
-  Array.of_list(List.map phys_reg [0;2;3;100;101;102;103;104])
+  Array.of_list(List.map phys_reg [0;2;3])
 
 let destroyed_at_oper = function
     Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs
@@ -382,13 +373,13 @@ let destroyed_at_raise = all_phys_regs
 let safe_register_pressure op = 4
 
 let max_register_pressure = function
-    Iextcall(_, _) -> [| 4; 0 |]
-  | Iintop(Idiv | Imod) -> [| 5; 4 |]
+    Iextcall(_, _) -> [| 4; max_int |]
+  | Iintop(Idiv | Imod) -> [| 5; max_int |]
   | Ialloc _ | Iintop(Icomp _) | Iintop_imm(Icomp _, _) |
-    Iintoffloat -> [| 6; 4 |]
-  | _ -> [|7; 4|]
+    Iintoffloat -> [| 6; max_int |]
+  | _ -> [|7; max_int |]
 
-(* Reloading of instruction arguments, storing of instruction results *)
+(* Reloading of instruction arguments, storing of instruction results. *)
 
 let stackp r =
   match r.loc with
@@ -403,6 +394,10 @@ let reload_test makereg tst arg =
       else arg
   | _ -> arg
 
+(* Since #floatregs = 0, pseudoregs of type float will never be reloaded.
+   Hence there is no need to make special cases for
+   floating-point operations. *)
+
 let reload_operation makereg op arg res =
   match op with
     Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor|Icomp _|Icheckbound) ->
@@ -411,21 +406,16 @@ let reload_operation makereg op arg res =
       then ([|arg.(0); makereg arg.(1)|], res)
       else (arg, res)
   | Iintop(Ilsl|Ilsr|Iasr) | Iintop_imm(_, _) | Ifloatofint | Iintoffloat |
-    Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf | Ispecific(Ipush) ->
+    Ispecific(Ipush) ->
       (* The argument(s) can be either in register or on stack *)
       (arg, res)
-  | Ispecific(Ifloatarithmem(_, _)) ->
-      (* First arg can be either in register or on stack, but remaining
-         arguments must be in registers *)
-      let newarg = Array.create (Array.length arg) arg.(0) in
-      for i = 1 to Array.length arg - 1 do newarg.(i) <- makereg arg.(i) done;
-      (newarg, res)
   | _ -> (* Other operations: all args and results in registers *)
       raise Use_default
 
 (* Scheduling is turned off because our model does not fit the 486
    nor Pentium very well. In particular, it messes up with the
-   float reg stack. *)
+   float reg stack. The Pentium Pro schedules at run-time much better 
+   than what we could do. *)
 
 let need_scheduling = false
 
diff --git a/asmcomp/proc_i386nt.ml b/asmcomp/proc_i386nt.ml
index 6037fe28d..0a787c9d5 100644
--- a/asmcomp/proc_i386nt.ml
+++ b/asmcomp/proc_i386nt.ml
@@ -31,19 +31,13 @@ open Mach
     edi         5
     ebp         6
 
-    f0 - f3     101-104         function arguments and results
-                                f0: C function results
-                                not preserved by C
-
-   The other 4 floating-point registers are treated as a stack.
-   We use the pseudo-register %tos (100) to represent the top of that stack. *)
+    tos         100             top of floating-point stack. *)
 
 let int_reg_name =
   [| "eax"; "ebx"; "ecx"; "edx"; "esi"; "edi"; "ebp" |]
 
 let float_reg_name =
-  [| "tos"; "st(0)"; "st(1)"; "st(2)"; "st(3)";
-     "st(4)"; "st(5)"; "st(6)"; "st(7)" |]
+  [| "tos" |]
 
 let num_register_classes = 2
 
@@ -53,15 +47,15 @@ let register_class r =
   | Addr -> 0
   | Float -> 1
 
-let num_available_registers = [| 7; 4 |]
+let num_available_registers = [| 7; 0 |]
 
-let first_available_register = [| 0; 101 |]
+let first_available_register = [| 0; 100 |]
 
 let register_name r =
   if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
 
-(* There is little scheduling, and some operations are more efficient when
-   eax or st(0) are arguments *)
+(* There is little scheduling, and some operations are more compact
+   when their argument is %eax. *)
 
 let rotate_registers = false
 
@@ -72,10 +66,7 @@ let hard_int_reg =
   for i = 0 to 6 do v.(i) <- Reg.at_location Int (Reg i) done;
   v
 
-let hard_float_reg =
-  let v = Array.create 5 Reg.dummy in
-  for i = 0 to 4 do v.(i) <- Reg.at_location Float (Reg(i + 100)) done;
-  v
+let hard_float_reg = [| Reg.at_location Float (Reg 100) |]
 
 let all_phys_regs =
   Array.append hard_int_reg hard_float_reg
@@ -346,23 +337,23 @@ let outgoing ofs = Outgoing ofs
 let not_supported ofs = fatal_error "Proc.loc_results: cannot call"
 
 let loc_arguments arg =
-  calling_conventions 0 5 101 104 outgoing arg
+  calling_conventions 0 5 100 99 outgoing arg
 let loc_parameters arg =
-  let (loc, ofs) = calling_conventions 0 5 101 104 incoming arg in loc
+  let (loc, ofs) = calling_conventions 0 5 100 99 incoming arg in loc
 let loc_results res =
-  let (loc, ofs) = calling_conventions 0 5 101 104 not_supported res in loc
+  let (loc, ofs) = calling_conventions 0 5 100 100 not_supported res in loc
 let extcall_use_push = true
 let loc_external_arguments arg =
   fatal_error "Proc.loc_external_arguments"
 let loc_external_results res =
-  let (loc, ofs) = calling_conventions 0 0 101 101 not_supported res in loc
+  let (loc, ofs) = calling_conventions 0 0 100 100 not_supported res in loc
 
 let loc_exn_bucket = eax
 
 (* Registers destroyed by operations *)
 
 let destroyed_at_c_call =               (* ebx, esi, edi, ebp preserved *)
-  Array.of_list(List.map phys_reg [0;2;3;100;101;102;103;104])
+  Array.of_list(List.map phys_reg [0;2;3])
 
 let destroyed_at_oper = function
     Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs
@@ -382,11 +373,11 @@ let destroyed_at_raise = all_phys_regs
 let safe_register_pressure op = 4
 
 let max_register_pressure = function
-    Iextcall(_, _) -> [| 4; 0 |]
-  | Iintop(Idiv | Imod) -> [| 5; 4 |]
+    Iextcall(_, _) -> [| 4; max_int |]
+  | Iintop(Idiv | Imod) -> [| 5; max_int |]
   | Ialloc _ | Iintop(Icomp _) | Iintop_imm(Icomp _, _) |
-    Iintoffloat -> [| 6; 4 |]
-  | _ -> [|7; 4|]
+    Iintoffloat -> [| 6; max_int |]
+  | _ -> [|7; max_int |]
 
 (* Reloading of instruction arguments, storing of instruction results *)
 
@@ -403,6 +394,10 @@ let reload_test makereg tst arg =
       else arg
   | _ -> arg
 
+(* Since #floatregs = 0, pseudoregs of type float will never be reloaded.
+   Hence there is no need to make special cases for
+   floating-point operations. *)
+
 let reload_operation makereg op arg res =
   match op with
     Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor|Icomp _|Icheckbound) ->
@@ -411,15 +406,9 @@ let reload_operation makereg op arg res =
       then ([|arg.(0); makereg arg.(1)|], res)
       else (arg, res)
   | Iintop(Ilsl|Ilsr|Iasr) | Iintop_imm(_, _) | Ifloatofint | Iintoffloat |
-    Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf | Ispecific(Ipush) ->
+    Ispecific(Ipush) ->
       (* The argument(s) can be either in register or on stack *)
       (arg, res)
-  | Ispecific(Ifloatarithmem(_, _)) ->
-      (* First arg can be either in register or on stack, but remaining
-         arguments must be in registers *)
-      let newarg = Array.create (Array.length arg) arg.(0) in
-      for i = 1 to Array.length arg - 1 do newarg.(i) <- makereg arg.(i) done;
-      (newarg, res)
   | _ -> (* Other operations: all args and results in registers *)
       raise Use_default
 
diff --git a/asmcomp/reload.ml b/asmcomp/reload.ml
index 7c559b2dc..c57631d1d 100644
--- a/asmcomp/reload.ml
+++ b/asmcomp/reload.ml
@@ -33,7 +33,10 @@ let makereg r =
   match r.loc with
     Unknown -> fatal_error "Reload.makereg"
   | Reg _ -> r
-  | Stack _ -> redo_regalloc := true; Reg.clone r
+  | Stack _ ->
+      if Proc.num_available_registers.(Proc.register_class r) = 0
+      then r
+      else begin redo_regalloc := true; Reg.clone r end
 
 let makeregs rv =
   let n = Array.length rv in
diff --git a/asmrun/i386.S b/asmrun/i386.S
index 4902addde..7980afbce 100644
--- a/asmrun/i386.S
+++ b/asmrun/i386.S
@@ -138,16 +138,10 @@ L103:   subl	G(young_ptr), %eax      /* eax = - size */
         .align  FUNCTION_ALIGN
 G(caml_c_call):
     /* Record lowest stack address and return address */
-    /* In parallel, free the floating point registers */
-    /* (Pairing is expected on the Pentium.) */
         movl    (%esp), %edx
-        ffree   %st(0)
         movl    %edx, G(caml_last_return_address)
-        ffree   %st(1)
         leal    4(%esp), %edx
-        ffree   %st(2)
         movl    %edx, G(caml_bottom_of_stack)
-        ffree   %st(3)
     /* Call the function (address in %eax) */
         jmp     *%eax
 
@@ -223,17 +217,11 @@ L107:
        used by caml_c_call */
         popl    G(caml_bottom_of_stack)
         popl    G(caml_last_return_address)
-    /* Restore callee-save registers.
-       In parallel, free the floating-point registers
-       that may have been used by Caml. */
+    /* Restore callee-save registers. */
         popl    %ebp
-        ffree   %st(0)
         popl    %edi
-        ffree   %st(1)
         popl    %esi
-        ffree   %st(2)
         popl    %ebx
-        ffree   %st(3)
     /* Return to caller. */
         ret
 L108:
diff --git a/asmrun/i386nt.asm b/asmrun/i386nt.asm
index fbd3277e4..4eaa7ba15 100644
--- a/asmrun/i386nt.asm
+++ b/asmrun/i386nt.asm
@@ -48,6 +48,32 @@ _caml_exception_pointer		DWORD 0
         PUBLIC  _caml_alloc
 	PUBLIC  _caml_call_gc
 
+_caml_call_gc:
+    ; Record lowest stack address and return address 
+        mov	eax, [esp]
+        mov     _caml_last_return_address, eax
+        lea     eax, [esp+4]
+        mov     _caml_bottom_of_stack, eax
+    ; Save all regs used by the code generator 
+L105:   mov	_gc_entry_regs + 4, ebx
+        mov	_gc_entry_regs + 8, ecx
+        mov	_gc_entry_regs + 12, edx
+        mov	_gc_entry_regs + 16, esi
+        mov	_gc_entry_regs + 20, edi
+        mov	_gc_entry_regs + 24, ebp
+    ; Call the garbage collector 
+        call	_garbage_collection
+    ; Restore all regs used by the code generator 
+        mov	ebx, _gc_entry_regs + 4
+        mov	ecx, _gc_entry_regs + 8
+        mov	edx, _gc_entry_regs + 12
+        mov	esi, _gc_entry_regs + 16
+        mov	edi, _gc_entry_regs + 20
+        mov	ebp, _gc_entry_regs + 24
+    ; Return to caller 
+        push	_caml_last_return_address
+        ret	
+
         ALIGN  4
 _caml_alloc1:
         mov	eax, _young_ptr
@@ -56,8 +82,12 @@ _caml_alloc1:
         cmp	eax, _young_limit
         jb	L100
         ret	
-L100:   mov	eax, 8
-        jmp	L105
+L100:   mov	eax, [esp]
+        mov     _caml_last_return_address, eax
+        lea     eax, [esp+4]
+        mov     _caml_bottom_of_stack, eax
+        call    L105
+        jmp     _caml_alloc1
 
         ALIGN  4
 _caml_alloc2:
@@ -67,8 +97,12 @@ _caml_alloc2:
         cmp	eax, _young_limit
         jb	L101
         ret	
-L101:   mov	eax, 12
-        jmp	L105
+L101:   mov	eax, [esp]
+        mov     _caml_last_return_address, eax
+        lea     eax, [esp+4]
+        mov     _caml_bottom_of_stack, eax
+        call    L105
+        jmp     _caml_alloc2
 
         ALIGN  4
 _caml_alloc3:
@@ -78,59 +112,31 @@ _caml_alloc3:
         cmp	eax, _young_limit
         jb	L102
         ret	
-L102:   mov	eax, 16
-        jmp	L105
+L102:   mov	eax, [esp]
+        mov     _caml_last_return_address, eax
+        lea     eax, [esp+4]
+        mov     _caml_bottom_of_stack, eax
+        call    L105
+        jmp     _caml_alloc3
 
         ALIGN  4
 _caml_alloc:
-        push	eax
-        mov	eax, _young_ptr
-        sub	eax, [esp]
-        mov	_young_ptr, eax
-        cmp	eax, _young_limit
-        jb	L103
-        add	esp, 4
-        ret	
-L103:   pop	eax
-        jmp	L105
-
-_caml_call_gc:
-    ; Adjust return address and recover desired size in eax 
-        pop	eax
-        add	eax, 2
-        push	eax
-        movzx	eax, WORD PTR [eax-2]
-L105:
-    ; Record lowest stack address and return address 
-        pop	_caml_last_return_address
-        mov	_caml_bottom_of_stack, esp
-    ; Save all regs used by the code generator 
-        mov	_gc_entry_regs + 4, ebx
-        mov	_gc_entry_regs + 8, ecx
-        mov	_gc_entry_regs + 12, edx
-        mov	_gc_entry_regs + 16, esi
-        mov	_gc_entry_regs + 20, edi
-        mov	_gc_entry_regs + 24, ebp
-    ; Save desired size 
-        push	eax
-    ; Call the garbage collector 
-        call	_garbage_collection
-    ; Restore all regs used by the code generator 
-        mov	ebx, _gc_entry_regs + 4
-        mov	ecx, _gc_entry_regs + 8
-        mov	edx, _gc_entry_regs + 12
-        mov	esi, _gc_entry_regs + 16
-        mov	edi, _gc_entry_regs + 20
-        mov	ebp, _gc_entry_regs + 24
-    ; Recover desired size 
-        pop	eax
-    ; Decrement young_ptr by desired size 
-        sub	_young_ptr, eax
-    ; Reload result of allocation in %eax 
-        mov	eax, _young_ptr
-    ; Return to caller 
-        push	_caml_last_return_address
-        ret	
+        sub     eax, _young_ptr         ; eax = size - young_ptr
+        neg     eax                     ; eax = young_ptr - size
+        cmp     eax, _young_limit
+        jb      L103
+        mov     _young_ptr, eax
+        ret
+L103:   sub     eax, _young_ptr         ; eax = - size
+        neg     eax                     ; eax = size
+        push    eax                     ; save desired size
+        mov	eax, [esp+4]
+        mov     _caml_last_return_address, eax
+        lea     eax, [esp+8]
+        mov     _caml_bottom_of_stack, eax
+        call    L105
+        pop     eax                     ; recover desired size
+        jmp     _caml_alloc
 
 ; Call a C function from Caml 
 
@@ -138,16 +144,10 @@ L105:
         ALIGN  4
 _caml_c_call:
     ; Record lowest stack address and return address 
-    ; In parallel, free the floating point registers 
-    ; (Pairing is expected on the Pentium.) 
         mov	edx, [esp]
-        ffree	st(0)
         mov	_caml_last_return_address, edx
-        ffree	st(1)
         lea	edx, [esp+4]
-        ffree	st(2)
         mov	_caml_bottom_of_stack, edx
-        ffree	st(3)
     ; Call the function (address in %eax) 
         jmp	eax
 
@@ -224,16 +224,10 @@ L107:
         pop	_caml_bottom_of_stack
         pop	_caml_last_return_address
     ; Restore callee-save registers.
-    ; In parallel, free the floating-point registers
-    ; that may have been used by Caml.
         pop	ebp
-        ffree	st(0)
         pop	edi
-        ffree	st(1)
         pop	esi
-        ffree	st(2)
         pop	ebx
-        ffree	st(3)
     ; Return to caller. 
         ret	
 L108:
author	Xavier Leroy <xavier.leroy@inria.fr>	1996-05-16 19:39:26 +0000
committer	Xavier Leroy <xavier.leroy@inria.fr>	1996-05-16 19:39:26 +0000
commit	2a309d688f7d6e673563ab11c06fe4e294cc4bfa (patch)
tree	42de46da0f07425f1f041cad8755eb2401324767
parent	064465320e18f583d63fe6c008ab9b8c4c22b0ae (diff)