6 files changed, 303 insertions, 177 deletions
diff --git a/asmcomp/i386/arch.ml b/asmcomp/i386/arch.ml
index e391df4a6..bcbfb5f3f 100644
--- a/asmcomp/i386/arch.ml
+++ b/asmcomp/i386/arch.ml
@@ -14,7 +14,11 @@
 
 (* Machine-specific command-line options *)
 
-let command_line_options = []
+let fast_math = ref false
+
+let command_line_options =
+  [ "-ffast-math", Arg.Set fast_math,
+      " Inline trigonometric and exponential functions" ]
 
 (* Specific operations for the Intel 386 processor *)
 
@@ -42,6 +46,8 @@ type specific_operation =
   | Ifloatarithmem of bool * float_operation * addressing_mode
                                         (* Float arith operation with memory *)
                                         (* bool: true=64 bits, false=32 *)
+  | Ifloatspecial of string
+
 and float_operation =
     Ifloatadd | Ifloatsub | Ifloatsubrev | Ifloatmul | Ifloatdiv | Ifloatdivrev
 
@@ -132,3 +138,10 @@ let print_specific_operation printreg op ppf arg =
       let long = if double then "float64" else "float32" in
       fprintf ppf "%a %s %s[%a]" printreg arg.(0) (op_name op) long
        (print_addressing printreg addr) (Array.sub arg 1 (Array.length arg - 1))
+  | Ifloatspecial name ->
+      fprintf ppf "%s " name;
+      for i = 0 to Array.length arg - 1 do
+        if i > 0 then fprintf ppf ", ";
+        printreg ppf arg.(i)
+      done
+      
diff --git a/asmcomp/i386/emit.mlp b/asmcomp/i386/emit.mlp
index e7e4e769c..821928e23 100644
--- a/asmcomp/i386/emit.mlp
+++ b/asmcomp/i386/emit.mlp
@@ -272,6 +272,99 @@ let output_epilogue () =
   let n = frame_size() - 4 in
   if n > 0 then `	addl	${emit_int n}, %esp\n`
 
+(* Determine if the given register is the top of the floating-point stack *)
+
+let is_tos = function { loc = Reg _; typ = Float } -> true | _ -> false
+
+(* Emit a "p" suffix if TOS is dead after the given instruction *)
+
+let pop_suffix i = 
+  let r = i.arg.(0) in
+  if not (is_tos r && Reg.Set.mem r i.live) then emit_string "p"
+
+(* Emit the code for a floating-point comparison *)
+
+let emit_float_test cmp neg arg lbl =
+  let actual_cmp =
+    match (is_tos arg.(0), is_tos arg.(1)) with
+      (true, true) ->
+      (* both args on top of FP stack *)
+      `	fcompp\n`;
+      cmp
+    | (true, false) ->
+      (* first arg on top of FP stack *)
+      `	fcompl	{emit_reg arg.(1)}\n`;
+      cmp
+    | (false, true) ->
+      (* second arg on top of FP stack *)
+      `	fcompl	{emit_reg arg.(0)}\n`;
+      Cmm.swap_comparison cmp
+    | (false, false) ->
+      `	fldl	{emit_reg arg.(0)}\n`;
+      `	fcompl	{emit_reg arg.(1)}\n`;
+      cmp
+    in
+  `	fnstsw	%ax\n`;
+  begin match actual_cmp with
+    Ceq ->
+      if neg then begin
+      `	andb	$68, %ah\n`;
+      `	xorb	$64, %ah\n`;
+      `	jne	`
+      end else begin
+      `	andb	$69, %ah\n`;
+      `	cmpb	$64, %ah\n`;
+      `	je	`
+      end
+  | Cne ->
+      if neg then begin
+      `	andb	$69, %ah\n`;
+      `	cmpb	$64, %ah\n`;
+      `	je	`
+      end else begin
+      `	andb	$68, %ah\n`;
+      `	xorb	$64, %ah\n`;
+      `	jne	`
+      end
+  | Cle ->
+      `	andb	$69, %ah\n`;
+      `	decb	%ah\n`;
+      `	cmpb	$64, %ah\n`;
+      if neg
+      then `	jae	`
+      else `	jb	`
+  | Cge ->
+      `	andb	$5, %ah\n`;
+      if neg
+      then `	jne	`
+      else `	je	`
+  | Clt ->
+      `	andb	$69, %ah\n`;
+      `	cmpb	$1, %ah\n`;
+      if neg
+      then `	jne	`
+      else `	je	`
+  | Cgt ->
+      `	andb	$69, %ah\n`;
+      if neg
+      then `	jne	`
+      else `	je	`
+  end;
+  `{emit_label lbl}\n`
+
+(* Emit a Ifloatspecial instruction *)
+
+let emit_floatspecial = function
+    "atan"  -> `	fldl1; fpatan\n`
+  | "atan2" -> `	fpatan\n`
+  | "cos"   -> `	fcos\n`
+  | "log"   -> `	fldln2; fxch; fyl2x\n`
+  | "log10" -> `	fldlg2; fxch; fyl2x\n`
+  | "sin"   -> `	fsin\n`
+  | "sqrt"  -> `	fsqrt\n`
+  | "tan"   -> `	fptan; fstp %st(0)\n`
+  | _ -> assert false
+
 (* Output the assembly code for an instruction *)
 
 (* Name of current function *)
@@ -280,12 +373,9 @@ let function_name = ref ""
 let tailrec_entry_point = ref 0
 (* Label of trap for out-of-range accesses *)
 let range_check_trap = ref 0
-
+(* Record float literals to be emitted later *)
 let float_constants = ref ([] : (int * string) list)
 
-let tos = phys_reg 100
-
-
 let emit_instr fallthrough i =
     match i.desc with
       Lend -> ()
@@ -293,8 +383,10 @@ let emit_instr fallthrough i =
         let src = i.arg.(0) and dst = i.res.(0) in
         if src.loc <> dst.loc then begin
           if src.typ = Float then
-            if src = tos then
-              `	fstpl	{emit_reg dst}\n`
+            if is_tos src then
+              `	fst{pop_suffix i}l	{emit_reg dst}\n`
+            else if is_tos dst then
+              `	fldl	{emit_reg src}\n`
             else begin
               `	fldl	{emit_reg src}\n`;
               `	fstpl	{emit_reg dst}\n`
@@ -354,6 +446,8 @@ let emit_instr fallthrough i =
     | Lop(Iload(chunk, addr)) ->
         let dest = i.res.(0) in
         begin match chunk with
+          | Word | Thirtytwo_signed | Thirtytwo_unsigned ->
+              `	movl	{emit_addressing addr i.arg 0}, {emit_reg dest}\n`
           | Byte_unsigned ->
               `	movzbl	{emit_addressing addr i.arg 0}, {emit_reg dest}\n`
           | Byte_signed ->
@@ -366,8 +460,6 @@ let emit_instr fallthrough i =
             `	flds	{emit_addressing addr i.arg 0}\n`
           | Double | Double_u ->
             `	fldl	{emit_addressing addr i.arg 0}\n`
-          | _ (* Word | Thirtytwo_signed | Thirtytwo_unsigned *) ->
-              `	movl	{emit_addressing addr i.arg 0}, {emit_reg dest}\n`
         end
     | Lop(Istore(chunk, addr)) ->
         begin match chunk with
@@ -378,15 +470,15 @@ let emit_instr fallthrough i =
           | Sixteen_unsigned | Sixteen_signed ->
             `	movw	{emit_reg16 i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
           | Single ->
-              if i.arg.(0) = tos then
-                `	fstps	{emit_addressing addr i.arg 1}\n`
+              if is_tos i.arg.(0) then
+                `	fst{pop_suffix i}s	{emit_addressing addr i.arg 1}\n`
               else begin
                 `	fldl	{emit_reg i.arg.(0)}\n`;
                 `	fstps	{emit_addressing addr i.arg 1}\n`
               end
           | Double | Double_u ->
-              if i.arg.(0) = tos then
-                `	fstpl	{emit_addressing addr i.arg 1}\n`
+              if is_tos i.arg.(0) then
+                `	fst{pop_suffix i}l	{emit_addressing addr i.arg 1}\n`
               else begin
                 `	fldl	{emit_reg i.arg.(0)}\n`;
                 `	fstpl	{emit_addressing addr i.arg 1}\n`
@@ -444,12 +536,12 @@ let emit_instr fallthrough i =
     | Lop(Iintop op) ->
         (* We have i.arg.(0) = i.res.(0) *)
         `	{emit_string(instr_for_intop op)}	{emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`
+    | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
+        `	leal	{emit_int n}({emit_reg i.arg.(0)}), {emit_reg i.res.(0)}\n`
     | Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) ->
         `	incl	{emit_reg i.res.(0)}\n`
     | Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) ->
         `	decl	{emit_reg i.res.(0)}\n`
-    | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
-        `	leal	{emit_int n}({emit_reg i.arg.(0)}), {emit_reg i.res.(0)}\n`
     | Lop(Iintop_imm(Idiv, n)) ->
         let l = Misc.log2 n in
         let lbl = new_label() in
@@ -470,21 +562,22 @@ let emit_instr fallthrough i =
         (* We have i.arg.(0) = i.res.(0) *)
         `	{emit_string(instr_for_intop op)}	${emit_int n}, {emit_reg i.res.(0)}\n`
     | Lop(Inegf | Iabsf as floatop) ->
-        if i.arg.(0) <> tos then
+        if not (is_tos i.arg.(0)) then
           `	fldl	{emit_reg i.arg.(0)}\n`;
         `	{emit_string(instr_for_floatop floatop)}\n`
     | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev)
           as floatop) ->
-        if i.arg.(0) = tos && i.arg.(1) = tos then
+        begin match (is_tos i.arg.(0), is_tos i.arg.(1)) with
+          (true, true) ->
           (* both operands on top of FP stack *)
           `	{emit_string(instr_for_floatop_pop floatop)}	%st, %st(1)\n`
-        else if i.arg.(0) = tos then
+        | (true, false) ->
           (* first operand on stack *)
           `	{emit_string(instr_for_floatop floatop)}	{emit_reg i.arg.(1)}\n`
-        else if i.arg.(1) = tos then
+        | (false, true) ->
           (* second operand on stack *)
           `	{emit_string(instr_for_floatop_reversed floatop)}	{emit_reg i.arg.(0)}\n`
-        else begin
+        | (false, false) ->
           (* both operands in memory *)
           `	fldl	{emit_reg i.arg.(0)}\n`;
           `	{emit_string(instr_for_floatop floatop)}	{emit_reg i.arg.(1)}\n`
@@ -499,7 +592,7 @@ let emit_instr fallthrough i =
             `	addl	$4, %esp\n`
         end
     | Lop(Iintoffloat) ->
-        if i.arg.(0) <> tos then
+        if not (is_tos i.arg.(0)) then
           `	fldl	{emit_reg i.arg.(0)}\n`;
         stack_offset := !stack_offset - 8;
         `	subl	$8, %esp\n`;
@@ -510,9 +603,9 @@ let emit_instr fallthrough i =
         `	fldcw	(%esp)\n`;
         begin match i.res.(0).loc with
           Stack s ->
-            `	fistpl	{emit_reg i.res.(0)}\n`
+            `	fist{pop_suffix i}l	{emit_reg i.res.(0)}\n`
         | _ ->
-            `	fistpl	(%esp)\n`;
+            `	fist{pop_suffix i}l	(%esp)\n`;
             `	movl	(%esp), {emit_reg i.res.(0)}\n`
         end;
         `	fldcw	4(%esp)\n`;
@@ -558,9 +651,18 @@ let emit_instr fallthrough i =
         `	pushl	{emit_addressing addr i.arg 0}\n`;
         stack_offset := !stack_offset + 8
     | Lop(Ispecific(Ifloatarithmem(double, op, addr))) ->
-        if i.arg.(0) <> tos then
+        if not (is_tos i.arg.(0)) then
           `	fldl	{emit_reg i.arg.(0)}\n`;
         `	{emit_string(instr_for_floatarithmem double op)}	{emit_addressing addr i.arg 1}\n`
+    | Lop(Ispecific(Ifloatspecial s)) ->
+        (* Push args on float stack if necessary *)
+        for k = 0 to Array.length i.arg - 1 do
+          if not (is_tos i.arg.(k)) then `	fldl	{emit_reg i.arg.(k)}\n`
+        done;
+        (* Fix-up for binary instrs whose args were swapped *)
+        if Array.length i.arg = 2 && is_tos i.arg.(1) then
+          `	fxch	%st(1)\n`;
+        emit_floatspecial s
     | Lreloadretaddr ->
         ()
     | Lreturn ->
@@ -591,70 +693,8 @@ let emit_instr fallthrough i =
             `	cmpl	${emit_int n}, {emit_reg i.arg.(0)}\n`;
             let b = name_for_cond_branch cmp in
             `	j{emit_string b}	{emit_label lbl}\n`
-        | Ifloattest((Ceq | Cne as cmp), neg) ->
-            if i.arg.(1) <> tos then
-              `	fldl	{emit_reg i.arg.(1)}\n`;
-            if i.arg.(0) <> tos then
-              `	fldl	{emit_reg i.arg.(0)}\n`;
-            `	fucompp\n`;
-            `	fnstsw	%ax\n`;
-            let neg1 = if cmp = Ceq then neg else not neg in
-            if neg1 then begin          (* branch if different *)
-              `	andb	$68, %ah\n`;
-              `	xorb	$64, %ah\n`;
-              `	jne	{emit_label lbl}\n`
-            end else begin              (* branch if equal *)
-              `	andb	$69, %ah\n`;
-              `	cmpb	$64, %ah\n`;
-              `	je	{emit_label lbl}\n`
-            end
         | Ifloattest(cmp, neg) ->
-            let actual_cmp =
-              if i.arg.(0) = tos && i.arg.(1) = tos then begin
-                (* both args on top of FP stack *)
-                `	fcompp\n`;
-                cmp
-              end else if i.arg.(0) = tos then begin
-                (* first arg on top of FP stack *)
-                `	fcompl	{emit_reg i.arg.(1)}\n`;
-                cmp
-              end else if i.arg.(1) = tos then begin
-                (* second arg on top of FP stack *)
-                `	fcompl	{emit_reg i.arg.(0)}\n`;
-                Cmm.swap_comparison cmp
-              end else begin
-                `	fldl	{emit_reg i.arg.(0)}\n`;
-                `	fcompl	{emit_reg i.arg.(1)}\n`;
-                cmp
-              end in
-            `	fnstsw	%ax\n`;
-            begin match actual_cmp with
-              Cle ->
-                `	andb	$69, %ah\n`;
-                `	decb	%ah\n`;
-                `	cmpb	$64, %ah\n`;
-                if neg
-                then `	jae	`
-                else `	jb	`
-            | Cge ->
-                `	andb	$5, %ah\n`;
-                if neg
-                then `	jne	`
-                else `	je	`
-            | Clt ->
-                `	andb	$69, %ah\n`;
-                `	cmpb	$1, %ah\n`;
-                if neg
-                then `	jne	`
-                else `	je	`
-            | Cgt ->
-                `	andb	$69, %ah\n`;
-                if neg
-                then `	jne	`
-                else `	je	`
-            | _ -> fatal_error "Emit_i386: floattest"
-            end;
-            `{emit_label lbl}\n`
+            emit_float_test cmp neg i.arg lbl
         | Ioddtest ->
             `	testl	$1, {emit_reg i.arg.(0)}\n`;
             `	jne	{emit_label lbl}\n`
diff --git a/asmcomp/i386/emit_nt.mlp b/asmcomp/i386/emit_nt.mlp
index b67b68336..57dd69a8e 100644
--- a/asmcomp/i386/emit_nt.mlp
+++ b/asmcomp/i386/emit_nt.mlp
@@ -246,6 +246,99 @@ let output_epilogue () =
   let n = frame_size() - 4 in
   if n > 0 then `	add    esp, {emit_int n}\n`
 
+(* Determine if the given register is the top of the floating-point stack *)
+
+let is_tos = function { loc = Reg _; typ = Float } -> true | _ -> false
+
+(* Emit a "p" suffix if TOS is dead after the given instruction *)
+
+let pop_suffix i = 
+  let r = i.arg.(0) in
+  if not (is_tos r && Reg.Set.mem r i.live) then emit_string "p"
+
+(* Emit the code for a floating-point comparison *)
+
+let emit_float_test cmp neg arg lbl =
+  let actual_cmp =
+    match (is_tos arg.(0), is_tos arg.(1)) with
+      (true, true) ->
+      (* both args on top of FP stack *)
+      `	fcompp\n`;
+      cmp
+    | (true, false) ->
+      (* first arg on top of FP stack *)
+      `	fcompl	{emit_reg arg.(1)}\n`;
+      cmp
+    | (false, true) ->
+      (* second arg on top of FP stack *)
+      `	fcompl	{emit_reg arg.(0)}\n`;
+      Cmm.swap_comparison cmp
+    | (false, false) ->
+      `	fldl	{emit_reg arg.(0)}\n`;
+      `	fcompl	{emit_reg arg.(1)}\n`;
+      cmp
+    in
+  `	fnstsw	%ax\n`;
+  begin match actual_cmp with
+    Ceq ->
+      if neg then begin
+      `	and	ah, 68\n`;
+      `	xor	ah, 64\n`;
+      `	jne	`
+      end else begin
+      `	and	ah, 69\n`;
+      `	cmp	ah, 64\n`;
+      `	je	`
+      end
+  | Cne ->
+      if neg then begin
+      `	and	ah, 69\n`;
+      `	cmp	ah, 64\n`;
+      `	je	`
+      end else begin
+      `	and	ah, 68\n`;
+      `	xor	ah, 64\n`;
+      `	jne	`
+      end
+  | Cle ->
+      `	and	ah, 69\n`;
+      `	dec	ah\n`;
+      `	cmp	ah, 64\n`;
+      if neg
+      then `	jae	`
+      else `	jb	`
+  | Cge ->
+      `	and	ah, 5\n`;
+      if neg
+      then `	jne	`
+      else `	je	`
+  | Clt ->
+      `	and	ah, 69\n`;
+      `	cmp	ah, 1\n`;
+      if neg
+      then `	jne	`
+      else `	je	`
+  | Cgt ->
+      `	and	ah, 69\n`;
+      if neg
+      then `	jne	`
+      else `	je	`
+  end;
+  `{emit_label lbl}\n`
+
+(* Emit a Ifloatspecial instruction *)
+
+let emit_floatspecial = function
+    "atan"  -> `	fldl1\n\tfpatan\n`
+  | "atan2" -> `	fpatan\n`
+  | "cos"   -> `	fcos\n`
+  | "log"   -> `	fldln2\n\tfxch\n\tfyl2x\n`
+  | "log10" -> `	fldlg2\n\tfxch\n\tfyl2x\n`
+  | "sin"   -> `	fsin\n`
+  | "sqrt"  -> `	fsqrt\n`
+  | "tan"   -> `	fptan\n\tfstp st(0)\n`
+  | _ -> assert false
+
 (* Output the assembly code for an instruction *)
 
 (* Name of current function *)
@@ -257,8 +350,6 @@ let range_check_trap = ref 0
 
 let float_constants = ref ([] : (int * string) list)
 
-let tos = phys_reg 100
-
 let emit_instr i =
     match i.desc with
       Lend -> ()
@@ -266,8 +357,10 @@ let emit_instr i =
         let src = i.arg.(0) and dst = i.res.(0) in
         if src.loc <> dst.loc then begin
           if src.typ = Float then
-            if src = tos then
-              `	fstp	{emit_reg dst}\n`
+            if is_tos src then
+              `	fst{pop_suffix i}	{emit_reg dst}\n`
+            else if is_tos dst then
+              `	fld	{emit_reg dst}\n`
             else begin
               `	fld	{emit_reg src}\n`;
               `	fstp	{emit_reg dst}\n`
@@ -331,6 +424,8 @@ let emit_instr i =
     | Lop(Iload(chunk, addr)) ->
         let dest = i.res.(0) in
         begin match chunk with
+          | Word | Thirtytwo_signed | Thirtytwo_unsigned ->
+            `	mov	{emit_reg dest}, DWORD PTR {emit_addressing addr i.arg 0}\n`
           | Byte_unsigned ->
               `	movzx	{emit_reg dest}, BYTE PTR {emit_addressing addr i.arg 0}\n`
           | Byte_signed ->
@@ -343,8 +438,6 @@ let emit_instr i =
             `	fld	REAL4 PTR {emit_addressing addr i.arg 0}\n`
           | Double | Double_u ->
             `	fld	REAL8 PTR {emit_addressing addr i.arg 0}\n`
-          | _ (* Word | Thirtytwo_signed | Thirtytwo_unsigned *) ->
-            `	mov	{emit_reg dest}, DWORD PTR {emit_addressing addr i.arg 0}\n`
         end
     | Lop(Istore(chunk, addr)) ->
         begin match chunk with
@@ -355,15 +448,15 @@ let emit_instr i =
           | Sixteen_unsigned | Sixteen_signed ->
             `	mov	WORD PTR {emit_addressing addr i.arg 1}, {emit_reg16 i.arg.(0)}\n`
           | Single ->
-              if i.arg.(0) = tos then
-                `	fstp	REAL4 PTR {emit_addressing addr i.arg 1}\n`
+              if is_tos i.arg.(0) then
+                `	fst{pop_suffix i}	REAL4 PTR {emit_addressing addr i.arg 1}\n`
               else begin
                 `	fld	{emit_reg i.arg.(0)}\n`;
                 `	fstp	REAL4 PTR {emit_addressing addr i.arg 1}\n`
               end
           | Double | Double_u ->
-              if i.arg.(0) = tos then
-                `	fstp	REAL8 PTR {emit_addressing addr i.arg 1}\n`
+              if is_tops i.arg.(0) then
+                `	fst{pop_suffix i}	REAL8 PTR {emit_addressing addr i.arg 1}\n`
               else begin
                 `	fld	{emit_reg i.arg.(0)}\n`;
                 `	fstp	REAL8 PTR {emit_addressing addr i.arg 1}\n`
@@ -421,12 +514,12 @@ let emit_instr i =
     | Lop(Iintop op) ->
         (* We have i.arg.(0) = i.res.(0) *)
         `	{emit_string(instr_for_intop op)}	{emit_reg i.res.(0)}, {emit_reg i.arg.(1)}\n`
+    | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
+        `	lea	{emit_reg i.res.(0)}, [{emit_reg i.arg.(0)}+{emit_int n}]\n`
     | Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) ->
         `	inc	{emit_reg i.res.(0)}\n`
     | Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) ->
         `	dec	{emit_reg i.res.(0)}\n`
-    | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
-        `	lea	{emit_reg i.res.(0)}, [{emit_reg i.arg.(0)}+{emit_int n}]\n`
     | Lop(Iintop_imm(Idiv, n)) ->
         let l = Misc.log2 n in
         let lbl = new_label() in
@@ -447,21 +540,22 @@ let emit_instr i =
         (* We have i.arg.(0) = i.res.(0) *)
         `	{emit_string(instr_for_intop op)}	{emit_reg i.res.(0)}, {emit_int n}\n`
     | Lop(Inegf | Iabsf as floatop) ->
-        if i.arg.(0) <> tos then
+        if not (is_tos i.arg.(0)) then
           `	fld	{emit_reg i.arg.(0)}\n`;
         `	{emit_string(instr_for_floatop floatop)}\n`
     | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev)
           as floatop) ->
-        if i.arg.(0) = tos && i.arg.(1) = tos then
+        begin match (is_tos i.arg.(0), is_tos i.arg.(1)) with
+          (true, true) ->
           (* both operands on top of FP stack *)
-          `	{emit_string(instr_for_floatop_reversed floatop)}\n`
-        else if i.arg.(0) = tos then
+          `	{emit_string(instr_for_floatop_pop floatop)}\n`
+        | (true, false) ->
           (* first operand on stack *)
           `	{emit_string(instr_for_floatop floatop)}	{emit_reg i.arg.(1)}\n`
-        else if i.arg.(1) = tos then
+        | (false, true) ->
           (* second operand on stack *)
           `	{emit_string(instr_for_floatop_reversed floatop)}	{emit_reg i.arg.(0)}\n`
-        else begin
+        | (false, false) ->
           (* both operands in memory *)
           `	fld	{emit_reg i.arg.(0)}\n`;
           `	{emit_string(instr_for_floatop floatop)}	{emit_reg i.arg.(1)}\n`
@@ -476,7 +570,7 @@ let emit_instr i =
             `	add	esp, 4\n`
         end
     | Lop(Iintoffloat) ->
-        if i.arg.(0) <> tos then
+        if not (is_tos i.arg.(0)) then
           `	fld	{emit_reg i.arg.(0)}\n`;
         stack_offset := !stack_offset - 8;
         `	sub	esp, 8\n`;
@@ -487,9 +581,9 @@ let emit_instr i =
         `	fldcw	[esp]\n`;
         begin match i.res.(0).loc with
           Stack s ->
-            `	fistp	{emit_reg i.res.(0)}\n`
+            `	fist{pop_suffix i}	{emit_reg i.res.(0)}\n`
         | _ ->
-            `	fistp	DWORD PTR [esp]\n`;
+            `	fist{pop_suffix i}	DWORD PTR [esp]\n`;
             `	mov	{emit_reg i.res.(0)}, [esp]\n`
         end;
         `	fldcw	[esp+4]\n`;
@@ -537,10 +631,19 @@ let emit_instr i =
         `	push	DWORD PTR {emit_addressing addr i.arg 0}\n`;
         stack_offset := !stack_offset + 8
     | Lop(Ispecific(Ifloatarithmem(double, op, addr))) ->
-        if i.arg.(0) <> tos then
+        if not (is_tos i.arg.(0)) then
           `	fld	{emit_reg i.arg.(0)}\n`;
         let size = if double then "REAL8" else "REAL4" in
         `	{emit_string(instr_for_floatarithmem op)}	{emit_string size} PTR {emit_addressing addr i.arg 1}\n`
+    | Lop(Ispecific(Ifloatspecial s)) ->
+        (* Push args on float stack if necessary *)
+        for k = 0 to Array.length i.arg - 1 do
+          if not (is_tos i.arg.(k)) then `	fld	{emit_reg i.arg.(k)}\n`
+        done;
+        (* Fix-up for binary instrs whose args were swapped *)
+        if Array.length i.arg = 2 && is_tos i.arg.(1) then
+          `	fxch	st(1)\n`;
+        emit_floatspecial s
     | Lreloadretaddr ->
         ()
     | Lreturn ->
@@ -571,70 +674,8 @@ let emit_instr i =
             `	cmp	{emit_reg i.arg.(0)}, {emit_int n}\n`;
             let b = name_for_cond_branch cmp in
             `	j{emit_string b}	{emit_label lbl}\n`
-        | Ifloattest((Ceq | Cne as cmp), neg) ->
-            if i.arg.(1) <> tos then
-              `	fld	{emit_reg i.arg.(1)}\n`;
-            if i.arg.(0) <> tos then
-              `	fld	{emit_reg i.arg.(0)}\n`;
-            `	fucompp\n`;
-            `	fnstsw	ax\n`;
-            let neg1 = if cmp = Ceq then neg else not neg in
-            if neg1 then begin          (* branch if different *)
-              `	and	ah, 68\n`;
-              `	xor	ah, 64\n`;
-              `	jne	{emit_label lbl}\n`
-            end else begin              (* branch if equal *)
-              `	and	ah, 69\n`;
-              `	cmp	ah, 64\n`;
-              `	je	{emit_label lbl}\n`
-            end
         | Ifloattest(cmp, neg) ->
-            let actual_cmp =
-              if i.arg.(0) = tos && i.arg.(1) = tos then begin
-                (* both args on top of FP stack *)
-                `	fcompp\n`;
-                cmp
-              end else if i.arg.(0) = tos then begin
-                (* first arg on top of FP stack *)
-                `	fcomp	{emit_reg i.arg.(1)}\n`;
-                cmp
-              end else if i.arg.(1) = tos then begin
-                (* second arg on top of FP stack *)
-                `	fcomp	{emit_reg i.arg.(0)}\n`;
-                Cmm.swap_comparison cmp
-              end else begin
-                `	fld	{emit_reg i.arg.(0)}\n`;
-                `	fcomp	{emit_reg i.arg.(1)}\n`;
-                cmp
-              end in
-            `	fnstsw	ax\n`;
-            begin match actual_cmp with
-              Cle ->
-                `	and	ah, 69\n`;
-                `	dec	ah\n`;
-                `	cmp	ah, 64\n`;
-                if neg
-                then `	jae	`
-                else `	jb	`
-            | Cge ->
-                `	and	ah, 5\n`;
-                if neg
-                then `	jne	`
-                else `	je	`
-            | Clt ->
-                `	and	ah, 69\n`;
-                `	cmp	ah, 1\n`;
-                if neg
-                then `	jne	`
-                else `	je	`
-            | Cgt ->
-                `	and	ah, 69\n`;
-                if neg
-                then `	jne	`
-                else `	je	`
-            | _ -> fatal_error "Emit_i386: floattest"
-            end;
-            `{emit_label lbl}\n`
+            emit_float_test cmp neg i.arg lbl
         | Ioddtest ->
             `	test	{emit_reg i.arg.(0)}, 1\n`;
             `	jne	{emit_label lbl}\n`
diff --git a/asmcomp/i386/proc.ml b/asmcomp/i386/proc.ml
index b36c5b23c..d03b121ad 100644
--- a/asmcomp/i386/proc.ml
+++ b/asmcomp/i386/proc.ml
@@ -47,7 +47,7 @@ let register_class r =
   | Addr -> 0
   | Float -> 1
 
-let num_available_registers = [| 7; 0 |]
+let num_available_registers = [| 7; 1 |]
 
 let first_available_register = [| 0; 100 |]
 
diff --git a/asmcomp/i386/selection.ml b/asmcomp/i386/selection.ml
index d6a5ef115..f2f79ca5e 100644
--- a/asmcomp/i386/selection.ml
+++ b/asmcomp/i386/selection.ml
@@ -73,14 +73,28 @@ let rec select_addr exp =
   | arg ->
       (Alinear arg, 0)
     
+(* C functions to be turned into Ifloatspecial instructions if -ffast-math *)
+
+let inline_float_ops =
+  ["atan"; "atan2"; "cos"; "log"; "log10"; "sin"; "sqrt"; "tan"]
+
 (* Estimate number of float temporaries needed to evaluate expression
    (Ershov's algorithm) *)
 
 let rec float_needs = function
-    Cop((Caddf | Csubf | Cmulf | Cdivf), [arg1; arg2]) ->
+    Cop((Cnegf | Cabsf), [arg]) ->
+      float_needs arg
+  | Cop((Caddf | Csubf | Cmulf | Cdivf), [arg1; arg2]) ->
       let n1 = float_needs arg1 in
       let n2 = float_needs arg2 in
       if n1 = n2 then 1 + n1 else if n1 > n2 then n1 else n2
+  | Cop(Cextcall(fn, ty_res, alloc), args)
+    when !fast_math && List.mem fn inline_float_ops ->
+      begin match args with
+        [arg] -> float_needs arg
+      | [arg1; arg2] -> max (float_needs arg2 + 1) (float_needs arg1)
+      | _ -> assert false
+      end
   | _ ->
       1
 
@@ -119,7 +133,7 @@ let pseudoregs_for_operation op arg res =
      the result is always left at the top of the floating-point stack *)
   | Iconst_float _ | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf
   | Ifloatofint | Iload((Single | Double | Double_u), _)
-  | Ispecific(Isubfrev | Idivfrev | Ifloatarithmem(_, _, _)) ->
+  | Ispecific(Isubfrev | Idivfrev | Ifloatarithmem(_, _, _) | Ifloatspecial _) ->
       (arg, [| tos |], false)           (* don't move it immediately *)
   (* For storing a byte, the argument must be in eax...edx.
      (But for a short, any reg will do!)
@@ -215,6 +229,11 @@ method select_operation op args =
       | _ ->
           super#select_operation op args
       end
+  (* Recognize inlined floating point operations *)
+  | Cextcall(fn, ty_res, false)
+    when !fast_math && List.mem fn inline_float_ops ->
+      (Ispecific(Ifloatspecial fn), args)
+  (* Default *)
   | _ -> super#select_operation op args
 
 (* Recognize float arithmetic with mem *)
diff --git a/asmcomp/selectgen.ml b/asmcomp/selectgen.ml
index 2138d7da4..4ea85fa51 100644
--- a/asmcomp/selectgen.ml
+++ b/asmcomp/selectgen.ml
@@ -76,6 +76,13 @@ let size_expr env exp =
         fatal_error "Selection.size_expr"
   in size Tbl.empty exp
 
+(* These are C library functions that are known to be pure
+   (no side effects at all) and worth not pre-computing. *)
+
+let pure_external_functions =
+  ["acos"; "asin"; "atan"; "atan2"; "cos"; "exp"; "log";
+   "log10"; "sin"; "sqrt"; "tan"]
+
 (* Says if an expression is "simple". A "simple" expression has no
    side-effects and its execution can be delayed until its value
    is really needed. In the case of e.g. an [alloc] instruction,
@@ -97,9 +104,15 @@ let rec is_simple_expr = function
   | Cop(op, args) ->
       begin match op with
         (* The following may have side effects *)
-        Capply _ | Cextcall(_, _, _) | Calloc | Cstore _ | Craise -> false
+      | Capply _ | Calloc | Cstore _ | Craise -> false
+        (* External C functions normally have side effects, unless known *)
+      | Cextcall(fn, _, alloc) ->
+          not alloc &&
+          List.mem fn pure_external_functions &&
+          List.for_all is_simple_expr args
         (* The remaining operations are simple if their args are *)
-      | _ -> List.for_all is_simple_expr args
+      | _ ->
+          List.for_all is_simple_expr args
       end
   | _ -> false