summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--asmcomp/i386/arch.ml15
-rw-r--r--asmcomp/i386/emit.mlp212
-rw-r--r--asmcomp/i386/emit_nt.mlp211
-rw-r--r--asmcomp/i386/proc.ml2
-rw-r--r--asmcomp/i386/selection.ml23
-rw-r--r--asmcomp/selectgen.ml17
6 files changed, 303 insertions, 177 deletions
diff --git a/asmcomp/i386/arch.ml b/asmcomp/i386/arch.ml
index e391df4a6..bcbfb5f3f 100644
--- a/asmcomp/i386/arch.ml
+++ b/asmcomp/i386/arch.ml
@@ -14,7 +14,11 @@
(* Machine-specific command-line options *)
-let command_line_options = []
+let fast_math = ref false
+
+let command_line_options =
+ [ "-ffast-math", Arg.Set fast_math,
+ " Inline trigonometric and exponential functions" ]
(* Specific operations for the Intel 386 processor *)
@@ -42,6 +46,8 @@ type specific_operation =
| Ifloatarithmem of bool * float_operation * addressing_mode
(* Float arith operation with memory *)
(* bool: true=64 bits, false=32 *)
+ | Ifloatspecial of string
+
and float_operation =
Ifloatadd | Ifloatsub | Ifloatsubrev | Ifloatmul | Ifloatdiv | Ifloatdivrev
@@ -132,3 +138,10 @@ let print_specific_operation printreg op ppf arg =
let long = if double then "float64" else "float32" in
fprintf ppf "%a %s %s[%a]" printreg arg.(0) (op_name op) long
(print_addressing printreg addr) (Array.sub arg 1 (Array.length arg - 1))
+ | Ifloatspecial name ->
+ fprintf ppf "%s " name;
+ for i = 0 to Array.length arg - 1 do
+ if i > 0 then fprintf ppf ", ";
+ printreg ppf arg.(i)
+ done
+
diff --git a/asmcomp/i386/emit.mlp b/asmcomp/i386/emit.mlp
index e7e4e769c..821928e23 100644
--- a/asmcomp/i386/emit.mlp
+++ b/asmcomp/i386/emit.mlp
@@ -272,6 +272,99 @@ let output_epilogue () =
let n = frame_size() - 4 in
if n > 0 then ` addl ${emit_int n}, %esp\n`
+(* Determine if the given register is the top of the floating-point stack *)
+
+let is_tos = function { loc = Reg _; typ = Float } -> true | _ -> false
+
+(* Emit a "p" suffix if TOS is dead after the given instruction *)
+
+let pop_suffix i =
+ let r = i.arg.(0) in
+ if not (is_tos r && Reg.Set.mem r i.live) then emit_string "p"
+
+(* Emit the code for a floating-point comparison *)
+
+let emit_float_test cmp neg arg lbl =
+ let actual_cmp =
+ match (is_tos arg.(0), is_tos arg.(1)) with
+ (true, true) ->
+ (* both args on top of FP stack *)
+ ` fcompp\n`;
+ cmp
+ | (true, false) ->
+ (* first arg on top of FP stack *)
+ ` fcompl {emit_reg arg.(1)}\n`;
+ cmp
+ | (false, true) ->
+ (* second arg on top of FP stack *)
+ ` fcompl {emit_reg arg.(0)}\n`;
+ Cmm.swap_comparison cmp
+ | (false, false) ->
+ ` fldl {emit_reg arg.(0)}\n`;
+ ` fcompl {emit_reg arg.(1)}\n`;
+ cmp
+ in
+ ` fnstsw %ax\n`;
+ begin match actual_cmp with
+ Ceq ->
+ if neg then begin
+ ` andb $68, %ah\n`;
+ ` xorb $64, %ah\n`;
+ ` jne `
+ end else begin
+ ` andb $69, %ah\n`;
+ ` cmpb $64, %ah\n`;
+ ` je `
+ end
+ | Cne ->
+ if neg then begin
+ ` andb $69, %ah\n`;
+ ` cmpb $64, %ah\n`;
+ ` je `
+ end else begin
+ ` andb $68, %ah\n`;
+ ` xorb $64, %ah\n`;
+ ` jne `
+ end
+ | Cle ->
+ ` andb $69, %ah\n`;
+ ` decb %ah\n`;
+ ` cmpb $64, %ah\n`;
+ if neg
+ then ` jae `
+ else ` jb `
+ | Cge ->
+ ` andb $5, %ah\n`;
+ if neg
+ then ` jne `
+ else ` je `
+ | Clt ->
+ ` andb $69, %ah\n`;
+ ` cmpb $1, %ah\n`;
+ if neg
+ then ` jne `
+ else ` je `
+ | Cgt ->
+ ` andb $69, %ah\n`;
+ if neg
+ then ` jne `
+ else ` je `
+ end;
+ `{emit_label lbl}\n`
+
+(* Emit a Ifloatspecial instruction *)
+
+let emit_floatspecial = function
+ "atan" -> ` fldl1; fpatan\n`
+ | "atan2" -> ` fpatan\n`
+ | "cos" -> ` fcos\n`
+ | "log" -> ` fldln2; fxch; fyl2x\n`
+ | "log10" -> ` fldlg2; fxch; fyl2x\n`
+ | "sin" -> ` fsin\n`
+ | "sqrt" -> ` fsqrt\n`
+ | "tan" -> ` fptan; fstp %st(0)\n`
+ | _ -> assert false
+
(* Output the assembly code for an instruction *)
(* Name of current function *)
@@ -280,12 +373,9 @@ let function_name = ref ""
let tailrec_entry_point = ref 0
(* Label of trap for out-of-range accesses *)
let range_check_trap = ref 0
-
+(* Record float literals to be emitted later *)
let float_constants = ref ([] : (int * string) list)
-let tos = phys_reg 100
-
-
let emit_instr fallthrough i =
match i.desc with
Lend -> ()
@@ -293,8 +383,10 @@ let emit_instr fallthrough i =
let src = i.arg.(0) and dst = i.res.(0) in
if src.loc <> dst.loc then begin
if src.typ = Float then
- if src = tos then
- ` fstpl {emit_reg dst}\n`
+ if is_tos src then
+ ` fst{pop_suffix i}l {emit_reg dst}\n`
+ else if is_tos dst then
+ ` fldl {emit_reg src}\n`
else begin
` fldl {emit_reg src}\n`;
` fstpl {emit_reg dst}\n`
@@ -354,6 +446,8 @@ let emit_instr fallthrough i =
| Lop(Iload(chunk, addr)) ->
let dest = i.res.(0) in
begin match chunk with
+ | Word | Thirtytwo_signed | Thirtytwo_unsigned ->
+ ` movl {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
| Byte_unsigned ->
` movzbl {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
| Byte_signed ->
@@ -366,8 +460,6 @@ let emit_instr fallthrough i =
` flds {emit_addressing addr i.arg 0}\n`
| Double | Double_u ->
` fldl {emit_addressing addr i.arg 0}\n`
- | _ (* Word | Thirtytwo_signed | Thirtytwo_unsigned *) ->
- ` movl {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
end
| Lop(Istore(chunk, addr)) ->
begin match chunk with
@@ -378,15 +470,15 @@ let emit_instr fallthrough i =
| Sixteen_unsigned | Sixteen_signed ->
` movw {emit_reg16 i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
| Single ->
- if i.arg.(0) = tos then
- ` fstps {emit_addressing addr i.arg 1}\n`
+ if is_tos i.arg.(0) then
+ ` fst{pop_suffix i}s {emit_addressing addr i.arg 1}\n`
else begin
` fldl {emit_reg i.arg.(0)}\n`;
` fstps {emit_addressing addr i.arg 1}\n`
end
| Double | Double_u ->
- if i.arg.(0) = tos then
- ` fstpl {emit_addressing addr i.arg 1}\n`
+ if is_tos i.arg.(0) then
+ ` fst{pop_suffix i}l {emit_addressing addr i.arg 1}\n`
else begin
` fldl {emit_reg i.arg.(0)}\n`;
` fstpl {emit_addressing addr i.arg 1}\n`
@@ -444,12 +536,12 @@ let emit_instr fallthrough i =
| Lop(Iintop op) ->
(* We have i.arg.(0) = i.res.(0) *)
` {emit_string(instr_for_intop op)} {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`
+ | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
+ ` leal {emit_int n}({emit_reg i.arg.(0)}), {emit_reg i.res.(0)}\n`
| Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) ->
` incl {emit_reg i.res.(0)}\n`
| Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) ->
` decl {emit_reg i.res.(0)}\n`
- | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
- ` leal {emit_int n}({emit_reg i.arg.(0)}), {emit_reg i.res.(0)}\n`
| Lop(Iintop_imm(Idiv, n)) ->
let l = Misc.log2 n in
let lbl = new_label() in
@@ -470,21 +562,22 @@ let emit_instr fallthrough i =
(* We have i.arg.(0) = i.res.(0) *)
` {emit_string(instr_for_intop op)} ${emit_int n}, {emit_reg i.res.(0)}\n`
| Lop(Inegf | Iabsf as floatop) ->
- if i.arg.(0) <> tos then
+ if not (is_tos i.arg.(0)) then
` fldl {emit_reg i.arg.(0)}\n`;
` {emit_string(instr_for_floatop floatop)}\n`
| Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev)
as floatop) ->
- if i.arg.(0) = tos && i.arg.(1) = tos then
+ begin match (is_tos i.arg.(0), is_tos i.arg.(1)) with
+ (true, true) ->
(* both operands on top of FP stack *)
` {emit_string(instr_for_floatop_pop floatop)} %st, %st(1)\n`
- else if i.arg.(0) = tos then
+ | (true, false) ->
(* first operand on stack *)
` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n`
- else if i.arg.(1) = tos then
+ | (false, true) ->
(* second operand on stack *)
` {emit_string(instr_for_floatop_reversed floatop)} {emit_reg i.arg.(0)}\n`
- else begin
+ | (false, false) ->
(* both operands in memory *)
` fldl {emit_reg i.arg.(0)}\n`;
` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n`
@@ -499,7 +592,7 @@ let emit_instr fallthrough i =
` addl $4, %esp\n`
end
| Lop(Iintoffloat) ->
- if i.arg.(0) <> tos then
+ if not (is_tos i.arg.(0)) then
` fldl {emit_reg i.arg.(0)}\n`;
stack_offset := !stack_offset - 8;
` subl $8, %esp\n`;
@@ -510,9 +603,9 @@ let emit_instr fallthrough i =
` fldcw (%esp)\n`;
begin match i.res.(0).loc with
Stack s ->
- ` fistpl {emit_reg i.res.(0)}\n`
+ ` fist{pop_suffix i}l {emit_reg i.res.(0)}\n`
| _ ->
- ` fistpl (%esp)\n`;
+ ` fist{pop_suffix i}l (%esp)\n`;
` movl (%esp), {emit_reg i.res.(0)}\n`
end;
` fldcw 4(%esp)\n`;
@@ -558,9 +651,18 @@ let emit_instr fallthrough i =
` pushl {emit_addressing addr i.arg 0}\n`;
stack_offset := !stack_offset + 8
| Lop(Ispecific(Ifloatarithmem(double, op, addr))) ->
- if i.arg.(0) <> tos then
+ if not (is_tos i.arg.(0)) then
` fldl {emit_reg i.arg.(0)}\n`;
` {emit_string(instr_for_floatarithmem double op)} {emit_addressing addr i.arg 1}\n`
+ | Lop(Ispecific(Ifloatspecial s)) ->
+ (* Push args on float stack if necessary *)
+ for k = 0 to Array.length i.arg - 1 do
+ if not (is_tos i.arg.(k)) then ` fldl {emit_reg i.arg.(k)}\n`
+ done;
+ (* Fix-up for binary instrs whose args were swapped *)
+ if Array.length i.arg = 2 && is_tos i.arg.(1) then
+ ` fxch %st(1)\n`;
+ emit_floatspecial s
| Lreloadretaddr ->
()
| Lreturn ->
@@ -591,70 +693,8 @@ let emit_instr fallthrough i =
` cmpl ${emit_int n}, {emit_reg i.arg.(0)}\n`;
let b = name_for_cond_branch cmp in
` j{emit_string b} {emit_label lbl}\n`
- | Ifloattest((Ceq | Cne as cmp), neg) ->
- if i.arg.(1) <> tos then
- ` fldl {emit_reg i.arg.(1)}\n`;
- if i.arg.(0) <> tos then
- ` fldl {emit_reg i.arg.(0)}\n`;
- ` fucompp\n`;
- ` fnstsw %ax\n`;
- let neg1 = if cmp = Ceq then neg else not neg in
- if neg1 then begin (* branch if different *)
- ` andb $68, %ah\n`;
- ` xorb $64, %ah\n`;
- ` jne {emit_label lbl}\n`
- end else begin (* branch if equal *)
- ` andb $69, %ah\n`;
- ` cmpb $64, %ah\n`;
- ` je {emit_label lbl}\n`
- end
| Ifloattest(cmp, neg) ->
- let actual_cmp =
- if i.arg.(0) = tos && i.arg.(1) = tos then begin
- (* both args on top of FP stack *)
- ` fcompp\n`;
- cmp
- end else if i.arg.(0) = tos then begin
- (* first arg on top of FP stack *)
- ` fcompl {emit_reg i.arg.(1)}\n`;
- cmp
- end else if i.arg.(1) = tos then begin
- (* second arg on top of FP stack *)
- ` fcompl {emit_reg i.arg.(0)}\n`;
- Cmm.swap_comparison cmp
- end else begin
- ` fldl {emit_reg i.arg.(0)}\n`;
- ` fcompl {emit_reg i.arg.(1)}\n`;
- cmp
- end in
- ` fnstsw %ax\n`;
- begin match actual_cmp with
- Cle ->
- ` andb $69, %ah\n`;
- ` decb %ah\n`;
- ` cmpb $64, %ah\n`;
- if neg
- then ` jae `
- else ` jb `
- | Cge ->
- ` andb $5, %ah\n`;
- if neg
- then ` jne `
- else ` je `
- | Clt ->
- ` andb $69, %ah\n`;
- ` cmpb $1, %ah\n`;
- if neg
- then ` jne `
- else ` je `
- | Cgt ->
- ` andb $69, %ah\n`;
- if neg
- then ` jne `
- else ` je `
- | _ -> fatal_error "Emit_i386: floattest"
- end;
- `{emit_label lbl}\n`
+ emit_float_test cmp neg i.arg lbl
| Ioddtest ->
` testl $1, {emit_reg i.arg.(0)}\n`;
` jne {emit_label lbl}\n`
diff --git a/asmcomp/i386/emit_nt.mlp b/asmcomp/i386/emit_nt.mlp
index b67b68336..57dd69a8e 100644
--- a/asmcomp/i386/emit_nt.mlp
+++ b/asmcomp/i386/emit_nt.mlp
@@ -246,6 +246,99 @@ let output_epilogue () =
let n = frame_size() - 4 in
if n > 0 then ` add esp, {emit_int n}\n`
+(* Determine if the given register is the top of the floating-point stack *)
+
+let is_tos = function { loc = Reg _; typ = Float } -> true | _ -> false
+
+(* Emit a "p" suffix if TOS is dead after the given instruction *)
+
+let pop_suffix i =
+ let r = i.arg.(0) in
+ if not (is_tos r && Reg.Set.mem r i.live) then emit_string "p"
+
+(* Emit the code for a floating-point comparison *)
+
+let emit_float_test cmp neg arg lbl =
+ let actual_cmp =
+ match (is_tos arg.(0), is_tos arg.(1)) with
+ (true, true) ->
+ (* both args on top of FP stack *)
+ ` fcompp\n`;
+ cmp
+ | (true, false) ->
+ (* first arg on top of FP stack *)
+ ` fcompl {emit_reg arg.(1)}\n`;
+ cmp
+ | (false, true) ->
+ (* second arg on top of FP stack *)
+ ` fcompl {emit_reg arg.(0)}\n`;
+ Cmm.swap_comparison cmp
+ | (false, false) ->
+ ` fldl {emit_reg arg.(0)}\n`;
+ ` fcompl {emit_reg arg.(1)}\n`;
+ cmp
+ in
+ ` fnstsw %ax\n`;
+ begin match actual_cmp with
+ Ceq ->
+ if neg then begin
+ ` and ah, 68\n`;
+ ` xor ah, 64\n`;
+ ` jne `
+ end else begin
+ ` and ah, 69\n`;
+ ` cmp ah, 64\n`;
+ ` je `
+ end
+ | Cne ->
+ if neg then begin
+ ` and ah, 69\n`;
+ ` cmp ah, 64\n`;
+ ` je `
+ end else begin
+ ` and ah, 68\n`;
+ ` xor ah, 64\n`;
+ ` jne `
+ end
+ | Cle ->
+ ` and ah, 69\n`;
+ ` dec ah\n`;
+ ` cmp ah, 64\n`;
+ if neg
+ then ` jae `
+ else ` jb `
+ | Cge ->
+ ` and ah, 5\n`;
+ if neg
+ then ` jne `
+ else ` je `
+ | Clt ->
+ ` and ah, 69\n`;
+ ` cmp ah, 1\n`;
+ if neg
+ then ` jne `
+ else ` je `
+ | Cgt ->
+ ` and ah, 69\n`;
+ if neg
+ then ` jne `
+ else ` je `
+ end;
+ `{emit_label lbl}\n`
+
+(* Emit a Ifloatspecial instruction *)
+
+let emit_floatspecial = function
+ "atan" -> ` fldl1\n\tfpatan\n`
+ | "atan2" -> ` fpatan\n`
+ | "cos" -> ` fcos\n`
+ | "log" -> ` fldln2\n\tfxch\n\tfyl2x\n`
+ | "log10" -> ` fldlg2\n\tfxch\n\tfyl2x\n`
+ | "sin" -> ` fsin\n`
+ | "sqrt" -> ` fsqrt\n`
+ | "tan" -> ` fptan\n\tfstp st(0)\n`
+ | _ -> assert false
+
(* Output the assembly code for an instruction *)
(* Name of current function *)
@@ -257,8 +350,6 @@ let range_check_trap = ref 0
let float_constants = ref ([] : (int * string) list)
-let tos = phys_reg 100
-
let emit_instr i =
match i.desc with
Lend -> ()
@@ -266,8 +357,10 @@ let emit_instr i =
let src = i.arg.(0) and dst = i.res.(0) in
if src.loc <> dst.loc then begin
if src.typ = Float then
- if src = tos then
- ` fstp {emit_reg dst}\n`
+ if is_tos src then
+ ` fst{pop_suffix i} {emit_reg dst}\n`
+ else if is_tos dst then
+ ` fld {emit_reg dst}\n`
else begin
` fld {emit_reg src}\n`;
` fstp {emit_reg dst}\n`
@@ -331,6 +424,8 @@ let emit_instr i =
| Lop(Iload(chunk, addr)) ->
let dest = i.res.(0) in
begin match chunk with
+ | Word | Thirtytwo_signed | Thirtytwo_unsigned ->
+ ` mov {emit_reg dest}, DWORD PTR {emit_addressing addr i.arg 0}\n`
| Byte_unsigned ->
` movzx {emit_reg dest}, BYTE PTR {emit_addressing addr i.arg 0}\n`
| Byte_signed ->
@@ -343,8 +438,6 @@ let emit_instr i =
` fld REAL4 PTR {emit_addressing addr i.arg 0}\n`
| Double | Double_u ->
` fld REAL8 PTR {emit_addressing addr i.arg 0}\n`
- | _ (* Word | Thirtytwo_signed | Thirtytwo_unsigned *) ->
- ` mov {emit_reg dest}, DWORD PTR {emit_addressing addr i.arg 0}\n`
end
| Lop(Istore(chunk, addr)) ->
begin match chunk with
@@ -355,15 +448,15 @@ let emit_instr i =
| Sixteen_unsigned | Sixteen_signed ->
` mov WORD PTR {emit_addressing addr i.arg 1}, {emit_reg16 i.arg.(0)}\n`
| Single ->
- if i.arg.(0) = tos then
- ` fstp REAL4 PTR {emit_addressing addr i.arg 1}\n`
+ if is_tos i.arg.(0) then
+ ` fst{pop_suffix i} REAL4 PTR {emit_addressing addr i.arg 1}\n`
else begin
` fld {emit_reg i.arg.(0)}\n`;
` fstp REAL4 PTR {emit_addressing addr i.arg 1}\n`
end
| Double | Double_u ->
- if i.arg.(0) = tos then
- ` fstp REAL8 PTR {emit_addressing addr i.arg 1}\n`
+ if is_tops i.arg.(0) then
+ ` fst{pop_suffix i} REAL8 PTR {emit_addressing addr i.arg 1}\n`
else begin
` fld {emit_reg i.arg.(0)}\n`;
` fstp REAL8 PTR {emit_addressing addr i.arg 1}\n`
@@ -421,12 +514,12 @@ let emit_instr i =
| Lop(Iintop op) ->
(* We have i.arg.(0) = i.res.(0) *)
` {emit_string(instr_for_intop op)} {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}\n`
+ | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
+ ` lea {emit_reg i.res.(0)}, [{emit_reg i.arg.(0)}+{emit_int n}]\n`
| Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) ->
` inc {emit_reg i.res.(0)}\n`
| Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) ->
` dec {emit_reg i.res.(0)}\n`
- | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
- ` lea {emit_reg i.res.(0)}, [{emit_reg i.arg.(0)}+{emit_int n}]\n`
| Lop(Iintop_imm(Idiv, n)) ->
let l = Misc.log2 n in
let lbl = new_label() in
@@ -447,21 +540,22 @@ let emit_instr i =
(* We have i.arg.(0) = i.res.(0) *)
` {emit_string(instr_for_intop op)} {emit_reg i.res.(0)}, {emit_int n}\n`
| Lop(Inegf | Iabsf as floatop) ->
- if i.arg.(0) <> tos then
+ if not (is_tos i.arg.(0)) then
` fld {emit_reg i.arg.(0)}\n`;
` {emit_string(instr_for_floatop floatop)}\n`
| Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev)
as floatop) ->
- if i.arg.(0) = tos && i.arg.(1) = tos then
+ begin match (is_tos i.arg.(0), is_tos i.arg.(1)) with
+ (true, true) ->
(* both operands on top of FP stack *)
- ` {emit_string(instr_for_floatop_reversed floatop)}\n`
- else if i.arg.(0) = tos then
+ ` {emit_string(instr_for_floatop_pop floatop)}\n`
+ | (true, false) ->
(* first operand on stack *)
` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n`
- else if i.arg.(1) = tos then
+ | (false, true) ->
(* second operand on stack *)
` {emit_string(instr_for_floatop_reversed floatop)} {emit_reg i.arg.(0)}\n`
- else begin
+ | (false, false) ->
(* both operands in memory *)
` fld {emit_reg i.arg.(0)}\n`;
` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n`
@@ -476,7 +570,7 @@ let emit_instr i =
` add esp, 4\n`
end
| Lop(Iintoffloat) ->
- if i.arg.(0) <> tos then
+ if not (is_tos i.arg.(0)) then
` fld {emit_reg i.arg.(0)}\n`;
stack_offset := !stack_offset - 8;
` sub esp, 8\n`;
@@ -487,9 +581,9 @@ let emit_instr i =
` fldcw [esp]\n`;
begin match i.res.(0).loc with
Stack s ->
- ` fistp {emit_reg i.res.(0)}\n`
+ ` fist{pop_suffix i} {emit_reg i.res.(0)}\n`
| _ ->
- ` fistp DWORD PTR [esp]\n`;
+ ` fist{pop_suffix i} DWORD PTR [esp]\n`;
` mov {emit_reg i.res.(0)}, [esp]\n`
end;
` fldcw [esp+4]\n`;
@@ -537,10 +631,19 @@ let emit_instr i =
` push DWORD PTR {emit_addressing addr i.arg 0}\n`;
stack_offset := !stack_offset + 8
| Lop(Ispecific(Ifloatarithmem(double, op, addr))) ->
- if i.arg.(0) <> tos then
+ if not (is_tos i.arg.(0)) then
` fld {emit_reg i.arg.(0)}\n`;
let size = if double then "REAL8" else "REAL4" in
` {emit_string(instr_for_floatarithmem op)} {emit_string size} PTR {emit_addressing addr i.arg 1}\n`
+ | Lop(Ispecific(Ifloatspecial s)) ->
+ (* Push args on float stack if necessary *)
+ for k = 0 to Array.length i.arg - 1 do
+ if not (is_tos i.arg.(k)) then ` fld {emit_reg i.arg.(k)}\n`
+ done;
+ (* Fix-up for binary instrs whose args were swapped *)
+ if Array.length i.arg = 2 && is_tos i.arg.(1) then
+ ` fxch st(1)\n`;
+ emit_floatspecial s
| Lreloadretaddr ->
()
| Lreturn ->
@@ -571,70 +674,8 @@ let emit_instr i =
` cmp {emit_reg i.arg.(0)}, {emit_int n}\n`;
let b = name_for_cond_branch cmp in
` j{emit_string b} {emit_label lbl}\n`
- | Ifloattest((Ceq | Cne as cmp), neg) ->
- if i.arg.(1) <> tos then
- ` fld {emit_reg i.arg.(1)}\n`;
- if i.arg.(0) <> tos then
- ` fld {emit_reg i.arg.(0)}\n`;
- ` fucompp\n`;
- ` fnstsw ax\n`;
- let neg1 = if cmp = Ceq then neg else not neg in
- if neg1 then begin (* branch if different *)
- ` and ah, 68\n`;
- ` xor ah, 64\n`;
- ` jne {emit_label lbl}\n`
- end else begin (* branch if equal *)
- ` and ah, 69\n`;
- ` cmp ah, 64\n`;
- ` je {emit_label lbl}\n`
- end
| Ifloattest(cmp, neg) ->
- let actual_cmp =
- if i.arg.(0) = tos && i.arg.(1) = tos then begin
- (* both args on top of FP stack *)
- ` fcompp\n`;
- cmp
- end else if i.arg.(0) = tos then begin
- (* first arg on top of FP stack *)
- ` fcomp {emit_reg i.arg.(1)}\n`;
- cmp
- end else if i.arg.(1) = tos then begin
- (* second arg on top of FP stack *)
- ` fcomp {emit_reg i.arg.(0)}\n`;
- Cmm.swap_comparison cmp
- end else begin
- ` fld {emit_reg i.arg.(0)}\n`;
- ` fcomp {emit_reg i.arg.(1)}\n`;
- cmp
- end in
- ` fnstsw ax\n`;
- begin match actual_cmp with
- Cle ->
- ` and ah, 69\n`;
- ` dec ah\n`;
- ` cmp ah, 64\n`;
- if neg
- then ` jae `
- else ` jb `
- | Cge ->
- ` and ah, 5\n`;
- if neg
- then ` jne `
- else ` je `
- | Clt ->
- ` and ah, 69\n`;
- ` cmp ah, 1\n`;
- if neg
- then ` jne `
- else ` je `
- | Cgt ->
- ` and ah, 69\n`;
- if neg
- then ` jne `
- else ` je `
- | _ -> fatal_error "Emit_i386: floattest"
- end;
- `{emit_label lbl}\n`
+ emit_float_test cmp neg i.arg lbl
| Ioddtest ->
` test {emit_reg i.arg.(0)}, 1\n`;
` jne {emit_label lbl}\n`
diff --git a/asmcomp/i386/proc.ml b/asmcomp/i386/proc.ml
index b36c5b23c..d03b121ad 100644
--- a/asmcomp/i386/proc.ml
+++ b/asmcomp/i386/proc.ml
@@ -47,7 +47,7 @@ let register_class r =
| Addr -> 0
| Float -> 1
-let num_available_registers = [| 7; 0 |]
+let num_available_registers = [| 7; 1 |]
let first_available_register = [| 0; 100 |]
diff --git a/asmcomp/i386/selection.ml b/asmcomp/i386/selection.ml
index d6a5ef115..f2f79ca5e 100644
--- a/asmcomp/i386/selection.ml
+++ b/asmcomp/i386/selection.ml
@@ -73,14 +73,28 @@ let rec select_addr exp =
| arg ->
(Alinear arg, 0)
+(* C functions to be turned into Ifloatspecial instructions if -ffast-math *)
+
+let inline_float_ops =
+ ["atan"; "atan2"; "cos"; "log"; "log10"; "sin"; "sqrt"; "tan"]
+
(* Estimate number of float temporaries needed to evaluate expression
(Ershov's algorithm) *)
let rec float_needs = function
- Cop((Caddf | Csubf | Cmulf | Cdivf), [arg1; arg2]) ->
+ Cop((Cnegf | Cabsf), [arg]) ->
+ float_needs arg
+ | Cop((Caddf | Csubf | Cmulf | Cdivf), [arg1; arg2]) ->
let n1 = float_needs arg1 in
let n2 = float_needs arg2 in
if n1 = n2 then 1 + n1 else if n1 > n2 then n1 else n2
+ | Cop(Cextcall(fn, ty_res, alloc), args)
+ when !fast_math && List.mem fn inline_float_ops ->
+ begin match args with
+ [arg] -> float_needs arg
+ | [arg1; arg2] -> max (float_needs arg2 + 1) (float_needs arg1)
+ | _ -> assert false
+ end
| _ ->
1
@@ -119,7 +133,7 @@ let pseudoregs_for_operation op arg res =
the result is always left at the top of the floating-point stack *)
| Iconst_float _ | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf
| Ifloatofint | Iload((Single | Double | Double_u), _)
- | Ispecific(Isubfrev | Idivfrev | Ifloatarithmem(_, _, _)) ->
+ | Ispecific(Isubfrev | Idivfrev | Ifloatarithmem(_, _, _) | Ifloatspecial _) ->
(arg, [| tos |], false) (* don't move it immediately *)
(* For storing a byte, the argument must be in eax...edx.
(But for a short, any reg will do!)
@@ -215,6 +229,11 @@ method select_operation op args =
| _ ->
super#select_operation op args
end
+ (* Recognize inlined floating point operations *)
+ | Cextcall(fn, ty_res, false)
+ when !fast_math && List.mem fn inline_float_ops ->
+ (Ispecific(Ifloatspecial fn), args)
+ (* Default *)
| _ -> super#select_operation op args
(* Recognize float arithmetic with mem *)
diff --git a/asmcomp/selectgen.ml b/asmcomp/selectgen.ml
index 2138d7da4..4ea85fa51 100644
--- a/asmcomp/selectgen.ml
+++ b/asmcomp/selectgen.ml
@@ -76,6 +76,13 @@ let size_expr env exp =
fatal_error "Selection.size_expr"
in size Tbl.empty exp
+(* These are C library functions that are known to be pure
+ (no side effects at all) and worth not pre-computing. *)
+
+let pure_external_functions =
+ ["acos"; "asin"; "atan"; "atan2"; "cos"; "exp"; "log";
+ "log10"; "sin"; "sqrt"; "tan"]
+
(* Says if an expression is "simple". A "simple" expression has no
side-effects and its execution can be delayed until its value
is really needed. In the case of e.g. an [alloc] instruction,
@@ -97,9 +104,15 @@ let rec is_simple_expr = function
| Cop(op, args) ->
begin match op with
(* The following may have side effects *)
- Capply _ | Cextcall(_, _, _) | Calloc | Cstore _ | Craise -> false
+ | Capply _ | Calloc | Cstore _ | Craise -> false
+ (* External C functions normally have side effects, unless known *)
+ | Cextcall(fn, _, alloc) ->
+ not alloc &&
+ List.mem fn pure_external_functions &&
+ List.for_all is_simple_expr args
(* The remaining operations are simple if their args are *)
- | _ -> List.for_all is_simple_expr args
+ | _ ->
+ List.for_all is_simple_expr args
end
| _ -> false