diff options
-rw-r--r-- | asmcomp/i386/arch.ml | 15 | ||||
-rw-r--r-- | asmcomp/i386/emit.mlp | 212 | ||||
-rw-r--r-- | asmcomp/i386/emit_nt.mlp | 211 | ||||
-rw-r--r-- | asmcomp/i386/proc.ml | 2 | ||||
-rw-r--r-- | asmcomp/i386/selection.ml | 23 | ||||
-rw-r--r-- | asmcomp/selectgen.ml | 17 |
6 files changed, 303 insertions, 177 deletions
diff --git a/asmcomp/i386/arch.ml b/asmcomp/i386/arch.ml index e391df4a6..bcbfb5f3f 100644 --- a/asmcomp/i386/arch.ml +++ b/asmcomp/i386/arch.ml @@ -14,7 +14,11 @@ (* Machine-specific command-line options *) -let command_line_options = [] +let fast_math = ref false + +let command_line_options = + [ "-ffast-math", Arg.Set fast_math, + " Inline trigonometric and exponential functions" ] (* Specific operations for the Intel 386 processor *) @@ -42,6 +46,8 @@ type specific_operation = | Ifloatarithmem of bool * float_operation * addressing_mode (* Float arith operation with memory *) (* bool: true=64 bits, false=32 *) + | Ifloatspecial of string + and float_operation = Ifloatadd | Ifloatsub | Ifloatsubrev | Ifloatmul | Ifloatdiv | Ifloatdivrev @@ -132,3 +138,10 @@ let print_specific_operation printreg op ppf arg = let long = if double then "float64" else "float32" in fprintf ppf "%a %s %s[%a]" printreg arg.(0) (op_name op) long (print_addressing printreg addr) (Array.sub arg 1 (Array.length arg - 1)) + | Ifloatspecial name -> + fprintf ppf "%s " name; + for i = 0 to Array.length arg - 1 do + if i > 0 then fprintf ppf ", "; + printreg ppf arg.(i) + done + diff --git a/asmcomp/i386/emit.mlp b/asmcomp/i386/emit.mlp index e7e4e769c..821928e23 100644 --- a/asmcomp/i386/emit.mlp +++ b/asmcomp/i386/emit.mlp @@ -272,6 +272,99 @@ let output_epilogue () = let n = frame_size() - 4 in if n > 0 then ` addl ${emit_int n}, %esp\n` +(* Determine if the given register is the top of the floating-point stack *) + +let is_tos = function { loc = Reg _; typ = Float } -> true | _ -> false + +(* Emit a "p" suffix if TOS is dead after the given instruction *) + +let pop_suffix i = + let r = i.arg.(0) in + if not (is_tos r && Reg.Set.mem r i.live) then emit_string "p" + +(* Emit the code for a floating-point comparison *) + +let emit_float_test cmp neg arg lbl = + let actual_cmp = + match (is_tos arg.(0), is_tos arg.(1)) with + (true, true) -> + (* both args on top of FP stack *) + ` fcompp\n`; + cmp + | (true, false) -> + (* first arg on top of FP stack *) + ` fcompl {emit_reg arg.(1)}\n`; + cmp + | (false, true) -> + (* second arg on top of FP stack *) + ` fcompl {emit_reg arg.(0)}\n`; + Cmm.swap_comparison cmp + | (false, false) -> + ` fldl {emit_reg arg.(0)}\n`; + ` fcompl {emit_reg arg.(1)}\n`; + cmp + in + ` fnstsw %ax\n`; + begin match actual_cmp with + Ceq -> + if neg then begin + ` andb $68, %ah\n`; + ` xorb $64, %ah\n`; + ` jne ` + end else begin + ` andb $69, %ah\n`; + ` cmpb $64, %ah\n`; + ` je ` + end + | Cne -> + if neg then begin + ` andb $69, %ah\n`; + ` cmpb $64, %ah\n`; + ` je ` + end else begin + ` andb $68, %ah\n`; + ` xorb $64, %ah\n`; + ` jne ` + end + | Cle -> + ` andb $69, %ah\n`; + ` decb %ah\n`; + ` cmpb $64, %ah\n`; + if neg + then ` jae ` + else ` jb ` + | Cge -> + ` andb $5, %ah\n`; + if neg + then ` jne ` + else ` je ` + | Clt -> + ` andb $69, %ah\n`; + ` cmpb $1, %ah\n`; + if neg + then ` jne ` + else ` je ` + | Cgt -> + ` andb $69, %ah\n`; + if neg + then ` jne ` + else ` je ` + end; + `{emit_label lbl}\n` + +(* Emit a Ifloatspecial instruction *) + +let emit_floatspecial = function + "atan" -> ` fldl1; fpatan\n` + | "atan2" -> ` fpatan\n` + | "cos" -> ` fcos\n` + | "log" -> ` fldln2; fxch; fyl2x\n` + | "log10" -> ` fldlg2; fxch; fyl2x\n` + | "sin" -> ` fsin\n` + | "sqrt" -> ` fsqrt\n` + | "tan" -> ` fptan; fstp %st(0)\n` + | _ -> assert false + (* Output the assembly code for an instruction *) (* Name of current function *) @@ -280,12 +373,9 @@ let function_name = ref "" let tailrec_entry_point = ref 0 (* Label of trap for out-of-range accesses *) let range_check_trap = ref 0 - +(* Record float literals to be emitted later *) let float_constants = ref ([] : (int * string) list) -let tos = phys_reg 100 - - let emit_instr fallthrough i = match i.desc with Lend -> () @@ -293,8 +383,10 @@ let emit_instr fallthrough i = let src = i.arg.(0) and dst = i.res.(0) in if src.loc <> dst.loc then begin if src.typ = Float then - if src = tos then - ` fstpl {emit_reg dst}\n` + if is_tos src then + ` fst{pop_suffix i}l {emit_reg dst}\n` + else if is_tos dst then + ` fldl {emit_reg src}\n` else begin ` fldl {emit_reg src}\n`; ` fstpl {emit_reg dst}\n` @@ -354,6 +446,8 @@ let emit_instr fallthrough i = | Lop(Iload(chunk, addr)) -> let dest = i.res.(0) in begin match chunk with + | Word | Thirtytwo_signed | Thirtytwo_unsigned -> + ` movl {emit_addressing addr i.arg 0}, {emit_reg dest}\n` | Byte_unsigned -> ` movzbl {emit_addressing addr i.arg 0}, {emit_reg dest}\n` | Byte_signed -> @@ -366,8 +460,6 @@ let emit_instr fallthrough i = ` flds {emit_addressing addr i.arg 0}\n` | Double | Double_u -> ` fldl {emit_addressing addr i.arg 0}\n` - | _ (* Word | Thirtytwo_signed | Thirtytwo_unsigned *) -> - ` movl {emit_addressing addr i.arg 0}, {emit_reg dest}\n` end | Lop(Istore(chunk, addr)) -> begin match chunk with @@ -378,15 +470,15 @@ let emit_instr fallthrough i = | Sixteen_unsigned | Sixteen_signed -> ` movw {emit_reg16 i.arg.(0)}, {emit_addressing addr i.arg 1}\n` | Single -> - if i.arg.(0) = tos then - ` fstps {emit_addressing addr i.arg 1}\n` + if is_tos i.arg.(0) then + ` fst{pop_suffix i}s {emit_addressing addr i.arg 1}\n` else begin ` fldl {emit_reg i.arg.(0)}\n`; ` fstps {emit_addressing addr i.arg 1}\n` end | Double | Double_u -> - if i.arg.(0) = tos then - ` fstpl {emit_addressing addr i.arg 1}\n` + if is_tos i.arg.(0) then + ` fst{pop_suffix i}l {emit_addressing addr i.arg 1}\n` else begin ` fldl {emit_reg i.arg.(0)}\n`; ` fstpl {emit_addressing addr i.arg 1}\n` @@ -444,12 +536,12 @@ let emit_instr fallthrough i = | Lop(Iintop op) -> (* We have i.arg.(0) = i.res.(0) *) ` {emit_string(instr_for_intop op)} {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n` + | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc -> + ` leal {emit_int n}({emit_reg i.arg.(0)}), {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) -> ` incl {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) -> ` decl {emit_reg i.res.(0)}\n` - | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc -> - ` leal {emit_int n}({emit_reg i.arg.(0)}), {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Idiv, n)) -> let l = Misc.log2 n in let lbl = new_label() in @@ -470,21 +562,22 @@ let emit_instr fallthrough i = (* We have i.arg.(0) = i.res.(0) *) ` {emit_string(instr_for_intop op)} ${emit_int n}, {emit_reg i.res.(0)}\n` | Lop(Inegf | Iabsf as floatop) -> - if i.arg.(0) <> tos then + if not (is_tos i.arg.(0)) then ` fldl {emit_reg i.arg.(0)}\n`; ` {emit_string(instr_for_floatop floatop)}\n` | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev) as floatop) -> - if i.arg.(0) = tos && i.arg.(1) = tos then + begin match (is_tos i.arg.(0), is_tos i.arg.(1)) with + (true, true) -> (* both operands on top of FP stack *) ` {emit_string(instr_for_floatop_pop floatop)} %st, %st(1)\n` - else if i.arg.(0) = tos then + | (true, false) -> (* first operand on stack *) ` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n` - else if i.arg.(1) = tos then + | (false, true) -> (* second operand on stack *) ` {emit_string(instr_for_floatop_reversed floatop)} {emit_reg i.arg.(0)}\n` - else begin + | (false, false) -> (* both operands in memory *) ` fldl {emit_reg i.arg.(0)}\n`; ` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n` @@ -499,7 +592,7 @@ let emit_instr fallthrough i = ` addl $4, %esp\n` end | Lop(Iintoffloat) -> - if i.arg.(0) <> tos then + if not (is_tos i.arg.(0)) then ` fldl {emit_reg i.arg.(0)}\n`; stack_offset := !stack_offset - 8; ` subl $8, %esp\n`; @@ -510,9 +603,9 @@ let emit_instr fallthrough i = ` fldcw (%esp)\n`; begin match i.res.(0).loc with Stack s -> - ` fistpl {emit_reg i.res.(0)}\n` + ` fist{pop_suffix i}l {emit_reg i.res.(0)}\n` | _ -> - ` fistpl (%esp)\n`; + ` fist{pop_suffix i}l (%esp)\n`; ` movl (%esp), {emit_reg i.res.(0)}\n` end; ` fldcw 4(%esp)\n`; @@ -558,9 +651,18 @@ let emit_instr fallthrough i = ` pushl {emit_addressing addr i.arg 0}\n`; stack_offset := !stack_offset + 8 | Lop(Ispecific(Ifloatarithmem(double, op, addr))) -> - if i.arg.(0) <> tos then + if not (is_tos i.arg.(0)) then ` fldl {emit_reg i.arg.(0)}\n`; ` {emit_string(instr_for_floatarithmem double op)} {emit_addressing addr i.arg 1}\n` + | Lop(Ispecific(Ifloatspecial s)) -> + (* Push args on float stack if necessary *) + for k = 0 to Array.length i.arg - 1 do + if not (is_tos i.arg.(k)) then ` fldl {emit_reg i.arg.(k)}\n` + done; + (* Fix-up for binary instrs whose args were swapped *) + if Array.length i.arg = 2 && is_tos i.arg.(1) then + ` fxch %st(1)\n`; + emit_floatspecial s | Lreloadretaddr -> () | Lreturn -> @@ -591,70 +693,8 @@ let emit_instr fallthrough i = ` cmpl ${emit_int n}, {emit_reg i.arg.(0)}\n`; let b = name_for_cond_branch cmp in ` j{emit_string b} {emit_label lbl}\n` - | Ifloattest((Ceq | Cne as cmp), neg) -> - if i.arg.(1) <> tos then - ` fldl {emit_reg i.arg.(1)}\n`; - if i.arg.(0) <> tos then - ` fldl {emit_reg i.arg.(0)}\n`; - ` fucompp\n`; - ` fnstsw %ax\n`; - let neg1 = if cmp = Ceq then neg else not neg in - if neg1 then begin (* branch if different *) - ` andb $68, %ah\n`; - ` xorb $64, %ah\n`; - ` jne {emit_label lbl}\n` - end else begin (* branch if equal *) - ` andb $69, %ah\n`; - ` cmpb $64, %ah\n`; - ` je {emit_label lbl}\n` - end | Ifloattest(cmp, neg) -> - let actual_cmp = - if i.arg.(0) = tos && i.arg.(1) = tos then begin - (* both args on top of FP stack *) - ` fcompp\n`; - cmp - end else if i.arg.(0) = tos then begin - (* first arg on top of FP stack *) - ` fcompl {emit_reg i.arg.(1)}\n`; - cmp - end else if i.arg.(1) = tos then begin - (* second arg on top of FP stack *) - ` fcompl {emit_reg i.arg.(0)}\n`; - Cmm.swap_comparison cmp - end else begin - ` fldl {emit_reg i.arg.(0)}\n`; - ` fcompl {emit_reg i.arg.(1)}\n`; - cmp - end in - ` fnstsw %ax\n`; - begin match actual_cmp with - Cle -> - ` andb $69, %ah\n`; - ` decb %ah\n`; - ` cmpb $64, %ah\n`; - if neg - then ` jae ` - else ` jb ` - | Cge -> - ` andb $5, %ah\n`; - if neg - then ` jne ` - else ` je ` - | Clt -> - ` andb $69, %ah\n`; - ` cmpb $1, %ah\n`; - if neg - then ` jne ` - else ` je ` - | Cgt -> - ` andb $69, %ah\n`; - if neg - then ` jne ` - else ` je ` - | _ -> fatal_error "Emit_i386: floattest" - end; - `{emit_label lbl}\n` + emit_float_test cmp neg i.arg lbl | Ioddtest -> ` testl $1, {emit_reg i.arg.(0)}\n`; ` jne {emit_label lbl}\n` diff --git a/asmcomp/i386/emit_nt.mlp b/asmcomp/i386/emit_nt.mlp index b67b68336..57dd69a8e 100644 --- a/asmcomp/i386/emit_nt.mlp +++ b/asmcomp/i386/emit_nt.mlp @@ -246,6 +246,99 @@ let output_epilogue () = let n = frame_size() - 4 in if n > 0 then ` add esp, {emit_int n}\n` +(* Determine if the given register is the top of the floating-point stack *) + +let is_tos = function { loc = Reg _; typ = Float } -> true | _ -> false + +(* Emit a "p" suffix if TOS is dead after the given instruction *) + +let pop_suffix i = + let r = i.arg.(0) in + if not (is_tos r && Reg.Set.mem r i.live) then emit_string "p" + +(* Emit the code for a floating-point comparison *) + +let emit_float_test cmp neg arg lbl = + let actual_cmp = + match (is_tos arg.(0), is_tos arg.(1)) with + (true, true) -> + (* both args on top of FP stack *) + ` fcompp\n`; + cmp + | (true, false) -> + (* first arg on top of FP stack *) + ` fcompl {emit_reg arg.(1)}\n`; + cmp + | (false, true) -> + (* second arg on top of FP stack *) + ` fcompl {emit_reg arg.(0)}\n`; + Cmm.swap_comparison cmp + | (false, false) -> + ` fldl {emit_reg arg.(0)}\n`; + ` fcompl {emit_reg arg.(1)}\n`; + cmp + in + ` fnstsw %ax\n`; + begin match actual_cmp with + Ceq -> + if neg then begin + ` and ah, 68\n`; + ` xor ah, 64\n`; + ` jne ` + end else begin + ` and ah, 69\n`; + ` cmp ah, 64\n`; + ` je ` + end + | Cne -> + if neg then begin + ` and ah, 69\n`; + ` cmp ah, 64\n`; + ` je ` + end else begin + ` and ah, 68\n`; + ` xor ah, 64\n`; + ` jne ` + end + | Cle -> + ` and ah, 69\n`; + ` dec ah\n`; + ` cmp ah, 64\n`; + if neg + then ` jae ` + else ` jb ` + | Cge -> + ` and ah, 5\n`; + if neg + then ` jne ` + else ` je ` + | Clt -> + ` and ah, 69\n`; + ` cmp ah, 1\n`; + if neg + then ` jne ` + else ` je ` + | Cgt -> + ` and ah, 69\n`; + if neg + then ` jne ` + else ` je ` + end; + `{emit_label lbl}\n` + +(* Emit a Ifloatspecial instruction *) + +let emit_floatspecial = function + "atan" -> ` fldl1\n\tfpatan\n` + | "atan2" -> ` fpatan\n` + | "cos" -> ` fcos\n` + | "log" -> ` fldln2\n\tfxch\n\tfyl2x\n` + | "log10" -> ` fldlg2\n\tfxch\n\tfyl2x\n` + | "sin" -> ` fsin\n` + | "sqrt" -> ` fsqrt\n` + | "tan" -> ` fptan\n\tfstp st(0)\n` + | _ -> assert false + (* Output the assembly code for an instruction *) (* Name of current function *) @@ -257,8 +350,6 @@ let range_check_trap = ref 0 let float_constants = ref ([] : (int * string) list) -let tos = phys_reg 100 - let emit_instr i = match i.desc with Lend -> () @@ -266,8 +357,10 @@ let emit_instr i = let src = i.arg.(0) and dst = i.res.(0) in if src.loc <> dst.loc then begin if src.typ = Float then - if src = tos then - ` fstp {emit_reg dst}\n` + if is_tos src then + ` fst{pop_suffix i} {emit_reg dst}\n` + else if is_tos dst then + ` fld {emit_reg dst}\n` else begin ` fld {emit_reg src}\n`; ` fstp {emit_reg dst}\n` @@ -331,6 +424,8 @@ let emit_instr i = | Lop(Iload(chunk, addr)) -> let dest = i.res.(0) in begin match chunk with + | Word | Thirtytwo_signed | Thirtytwo_unsigned -> + ` mov {emit_reg dest}, DWORD PTR {emit_addressing addr i.arg 0}\n` | Byte_unsigned -> ` movzx {emit_reg dest}, BYTE PTR {emit_addressing addr i.arg 0}\n` | Byte_signed -> @@ -343,8 +438,6 @@ let emit_instr i = ` fld REAL4 PTR {emit_addressing addr i.arg 0}\n` | Double | Double_u -> ` fld REAL8 PTR {emit_addressing addr i.arg 0}\n` - | _ (* Word | Thirtytwo_signed | Thirtytwo_unsigned *) -> - ` mov {emit_reg dest}, DWORD PTR {emit_addressing addr i.arg 0}\n` end | Lop(Istore(chunk, addr)) -> begin match chunk with @@ -355,15 +448,15 @@ let emit_instr i = | Sixteen_unsigned | Sixteen_signed -> ` mov WORD PTR {emit_addressing addr i.arg 1}, {emit_reg16 i.arg.(0)}\n` | Single -> - if i.arg.(0) = tos then - ` fstp REAL4 PTR {emit_addressing addr i.arg 1}\n` + if is_tos i.arg.(0) then + ` fst{pop_suffix i} REAL4 PTR {emit_addressing addr i.arg 1}\n` else begin ` fld {emit_reg i.arg.(0)}\n`; ` fstp REAL4 PTR {emit_addressing addr i.arg 1}\n` end | Double | Double_u -> - if i.arg.(0) = tos then - ` fstp REAL8 PTR {emit_addressing addr i.arg 1}\n` + if is_tops i.arg.(0) then + ` fst{pop_suffix i} REAL8 PTR {emit_addressing addr i.arg 1}\n` else begin ` fld {emit_reg i.arg.(0)}\n`; ` fstp REAL8 PTR {emit_addressing addr i.arg 1}\n` @@ -421,12 +514,12 @@ let emit_instr i = | Lop(Iintop op) -> (* We have i.arg.(0) = i.res.(0) *) ` {emit_string(instr_for_intop op)} {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}\n` + | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc -> + ` lea {emit_reg i.res.(0)}, [{emit_reg i.arg.(0)}+{emit_int n}]\n` | Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) -> ` inc {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) -> ` dec {emit_reg i.res.(0)}\n` - | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc -> - ` lea {emit_reg i.res.(0)}, [{emit_reg i.arg.(0)}+{emit_int n}]\n` | Lop(Iintop_imm(Idiv, n)) -> let l = Misc.log2 n in let lbl = new_label() in @@ -447,21 +540,22 @@ let emit_instr i = (* We have i.arg.(0) = i.res.(0) *) ` {emit_string(instr_for_intop op)} {emit_reg i.res.(0)}, {emit_int n}\n` | Lop(Inegf | Iabsf as floatop) -> - if i.arg.(0) <> tos then + if not (is_tos i.arg.(0)) then ` fld {emit_reg i.arg.(0)}\n`; ` {emit_string(instr_for_floatop floatop)}\n` | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev) as floatop) -> - if i.arg.(0) = tos && i.arg.(1) = tos then + begin match (is_tos i.arg.(0), is_tos i.arg.(1)) with + (true, true) -> (* both operands on top of FP stack *) - ` {emit_string(instr_for_floatop_reversed floatop)}\n` - else if i.arg.(0) = tos then + ` {emit_string(instr_for_floatop_pop floatop)}\n` + | (true, false) -> (* first operand on stack *) ` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n` - else if i.arg.(1) = tos then + | (false, true) -> (* second operand on stack *) ` {emit_string(instr_for_floatop_reversed floatop)} {emit_reg i.arg.(0)}\n` - else begin + | (false, false) -> (* both operands in memory *) ` fld {emit_reg i.arg.(0)}\n`; ` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n` @@ -476,7 +570,7 @@ let emit_instr i = ` add esp, 4\n` end | Lop(Iintoffloat) -> - if i.arg.(0) <> tos then + if not (is_tos i.arg.(0)) then ` fld {emit_reg i.arg.(0)}\n`; stack_offset := !stack_offset - 8; ` sub esp, 8\n`; @@ -487,9 +581,9 @@ let emit_instr i = ` fldcw [esp]\n`; begin match i.res.(0).loc with Stack s -> - ` fistp {emit_reg i.res.(0)}\n` + ` fist{pop_suffix i} {emit_reg i.res.(0)}\n` | _ -> - ` fistp DWORD PTR [esp]\n`; + ` fist{pop_suffix i} DWORD PTR [esp]\n`; ` mov {emit_reg i.res.(0)}, [esp]\n` end; ` fldcw [esp+4]\n`; @@ -537,10 +631,19 @@ let emit_instr i = ` push DWORD PTR {emit_addressing addr i.arg 0}\n`; stack_offset := !stack_offset + 8 | Lop(Ispecific(Ifloatarithmem(double, op, addr))) -> - if i.arg.(0) <> tos then + if not (is_tos i.arg.(0)) then ` fld {emit_reg i.arg.(0)}\n`; let size = if double then "REAL8" else "REAL4" in ` {emit_string(instr_for_floatarithmem op)} {emit_string size} PTR {emit_addressing addr i.arg 1}\n` + | Lop(Ispecific(Ifloatspecial s)) -> + (* Push args on float stack if necessary *) + for k = 0 to Array.length i.arg - 1 do + if not (is_tos i.arg.(k)) then ` fld {emit_reg i.arg.(k)}\n` + done; + (* Fix-up for binary instrs whose args were swapped *) + if Array.length i.arg = 2 && is_tos i.arg.(1) then + ` fxch st(1)\n`; + emit_floatspecial s | Lreloadretaddr -> () | Lreturn -> @@ -571,70 +674,8 @@ let emit_instr i = ` cmp {emit_reg i.arg.(0)}, {emit_int n}\n`; let b = name_for_cond_branch cmp in ` j{emit_string b} {emit_label lbl}\n` - | Ifloattest((Ceq | Cne as cmp), neg) -> - if i.arg.(1) <> tos then - ` fld {emit_reg i.arg.(1)}\n`; - if i.arg.(0) <> tos then - ` fld {emit_reg i.arg.(0)}\n`; - ` fucompp\n`; - ` fnstsw ax\n`; - let neg1 = if cmp = Ceq then neg else not neg in - if neg1 then begin (* branch if different *) - ` and ah, 68\n`; - ` xor ah, 64\n`; - ` jne {emit_label lbl}\n` - end else begin (* branch if equal *) - ` and ah, 69\n`; - ` cmp ah, 64\n`; - ` je {emit_label lbl}\n` - end | Ifloattest(cmp, neg) -> - let actual_cmp = - if i.arg.(0) = tos && i.arg.(1) = tos then begin - (* both args on top of FP stack *) - ` fcompp\n`; - cmp - end else if i.arg.(0) = tos then begin - (* first arg on top of FP stack *) - ` fcomp {emit_reg i.arg.(1)}\n`; - cmp - end else if i.arg.(1) = tos then begin - (* second arg on top of FP stack *) - ` fcomp {emit_reg i.arg.(0)}\n`; - Cmm.swap_comparison cmp - end else begin - ` fld {emit_reg i.arg.(0)}\n`; - ` fcomp {emit_reg i.arg.(1)}\n`; - cmp - end in - ` fnstsw ax\n`; - begin match actual_cmp with - Cle -> - ` and ah, 69\n`; - ` dec ah\n`; - ` cmp ah, 64\n`; - if neg - then ` jae ` - else ` jb ` - | Cge -> - ` and ah, 5\n`; - if neg - then ` jne ` - else ` je ` - | Clt -> - ` and ah, 69\n`; - ` cmp ah, 1\n`; - if neg - then ` jne ` - else ` je ` - | Cgt -> - ` and ah, 69\n`; - if neg - then ` jne ` - else ` je ` - | _ -> fatal_error "Emit_i386: floattest" - end; - `{emit_label lbl}\n` + emit_float_test cmp neg i.arg lbl | Ioddtest -> ` test {emit_reg i.arg.(0)}, 1\n`; ` jne {emit_label lbl}\n` diff --git a/asmcomp/i386/proc.ml b/asmcomp/i386/proc.ml index b36c5b23c..d03b121ad 100644 --- a/asmcomp/i386/proc.ml +++ b/asmcomp/i386/proc.ml @@ -47,7 +47,7 @@ let register_class r = | Addr -> 0 | Float -> 1 -let num_available_registers = [| 7; 0 |] +let num_available_registers = [| 7; 1 |] let first_available_register = [| 0; 100 |] diff --git a/asmcomp/i386/selection.ml b/asmcomp/i386/selection.ml index d6a5ef115..f2f79ca5e 100644 --- a/asmcomp/i386/selection.ml +++ b/asmcomp/i386/selection.ml @@ -73,14 +73,28 @@ let rec select_addr exp = | arg -> (Alinear arg, 0) +(* C functions to be turned into Ifloatspecial instructions if -ffast-math *) + +let inline_float_ops = + ["atan"; "atan2"; "cos"; "log"; "log10"; "sin"; "sqrt"; "tan"] + (* Estimate number of float temporaries needed to evaluate expression (Ershov's algorithm) *) let rec float_needs = function - Cop((Caddf | Csubf | Cmulf | Cdivf), [arg1; arg2]) -> + Cop((Cnegf | Cabsf), [arg]) -> + float_needs arg + | Cop((Caddf | Csubf | Cmulf | Cdivf), [arg1; arg2]) -> let n1 = float_needs arg1 in let n2 = float_needs arg2 in if n1 = n2 then 1 + n1 else if n1 > n2 then n1 else n2 + | Cop(Cextcall(fn, ty_res, alloc), args) + when !fast_math && List.mem fn inline_float_ops -> + begin match args with + [arg] -> float_needs arg + | [arg1; arg2] -> max (float_needs arg2 + 1) (float_needs arg1) + | _ -> assert false + end | _ -> 1 @@ -119,7 +133,7 @@ let pseudoregs_for_operation op arg res = the result is always left at the top of the floating-point stack *) | Iconst_float _ | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf | Ifloatofint | Iload((Single | Double | Double_u), _) - | Ispecific(Isubfrev | Idivfrev | Ifloatarithmem(_, _, _)) -> + | Ispecific(Isubfrev | Idivfrev | Ifloatarithmem(_, _, _) | Ifloatspecial _) -> (arg, [| tos |], false) (* don't move it immediately *) (* For storing a byte, the argument must be in eax...edx. (But for a short, any reg will do!) @@ -215,6 +229,11 @@ method select_operation op args = | _ -> super#select_operation op args end + (* Recognize inlined floating point operations *) + | Cextcall(fn, ty_res, false) + when !fast_math && List.mem fn inline_float_ops -> + (Ispecific(Ifloatspecial fn), args) + (* Default *) | _ -> super#select_operation op args (* Recognize float arithmetic with mem *) diff --git a/asmcomp/selectgen.ml b/asmcomp/selectgen.ml index 2138d7da4..4ea85fa51 100644 --- a/asmcomp/selectgen.ml +++ b/asmcomp/selectgen.ml @@ -76,6 +76,13 @@ let size_expr env exp = fatal_error "Selection.size_expr" in size Tbl.empty exp +(* These are C library functions that are known to be pure + (no side effects at all) and worth not pre-computing. *) + +let pure_external_functions = + ["acos"; "asin"; "atan"; "atan2"; "cos"; "exp"; "log"; + "log10"; "sin"; "sqrt"; "tan"] + (* Says if an expression is "simple". A "simple" expression has no side-effects and its execution can be delayed until its value is really needed. In the case of e.g. an [alloc] instruction, @@ -97,9 +104,15 @@ let rec is_simple_expr = function | Cop(op, args) -> begin match op with (* The following may have side effects *) - Capply _ | Cextcall(_, _, _) | Calloc | Cstore _ | Craise -> false + | Capply _ | Calloc | Cstore _ | Craise -> false + (* External C functions normally have side effects, unless known *) + | Cextcall(fn, _, alloc) -> + not alloc && + List.mem fn pure_external_functions && + List.for_all is_simple_expr args (* The remaining operations are simple if their args are *) - | _ -> List.for_all is_simple_expr args + | _ -> + List.for_all is_simple_expr args end | _ -> false |