diff options
author | Xavier Leroy <xavier.leroy@inria.fr> | 1996-05-16 19:39:26 +0000 |
---|---|---|
committer | Xavier Leroy <xavier.leroy@inria.fr> | 1996-05-16 19:39:26 +0000 |
commit | 2a309d688f7d6e673563ab11c06fe4e294cc4bfa (patch) | |
tree | 42de46da0f07425f1f041cad8755eb2401324767 | |
parent | 064465320e18f583d63fe6c008ab9b8c4c22b0ae (diff) |
Ne plus utiliser de registres flottants
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@825 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
-rw-r--r-- | asmcomp/coloring.ml | 129 | ||||
-rw-r--r-- | asmcomp/emit_i386.mlp | 254 | ||||
-rw-r--r-- | asmcomp/emit_i386nt.mlp | 222 | ||||
-rw-r--r-- | asmcomp/proc_i386.ml | 58 | ||||
-rw-r--r-- | asmcomp/proc_i386nt.ml | 53 | ||||
-rw-r--r-- | asmcomp/reload.ml | 5 | ||||
-rw-r--r-- | asmrun/i386.S | 14 | ||||
-rw-r--r-- | asmrun/i386nt.asm | 126 |
8 files changed, 347 insertions, 514 deletions
diff --git a/asmcomp/coloring.ml b/asmcomp/coloring.ml index 74cfbb632..5ba3dc50c 100644 --- a/asmcomp/coloring.ml +++ b/asmcomp/coloring.ml @@ -46,15 +46,22 @@ let constrained = ref Reg.Set.empty let find_degree reg = if reg.spill then () else begin - let deg = ref 0 in let cl = Proc.register_class reg in - List.iter - (fun r -> if not r.spill & Proc.register_class r = cl then incr deg) - reg.interf; - reg.degree <- !deg; - if !deg >= Proc.num_available_registers.(cl) - then constrained := Reg.Set.add reg !constrained - else unconstrained := Reg.Set.add reg !unconstrained + let avail_regs = Proc.num_available_registers.(cl) in + if avail_regs = 0 then + (* Don't bother computing the degree if there are no regs + in this class *) + unconstrained := Reg.Set.add reg !unconstrained + else begin + let deg = ref 0 in + List.iter + (fun r -> if not r.spill & Proc.register_class r = cl then incr deg) + reg.interf; + reg.degree <- !deg; + if !deg >= avail_regs + then constrained := Reg.Set.add reg !constrained + else unconstrained := Reg.Set.add reg !unconstrained + end end (* Remove a register from the interference graph *) @@ -137,60 +144,62 @@ let assign_location reg = let num_regs = Proc.num_available_registers.(cl) in let last_reg = first_reg + num_regs in let score = Array.create num_regs 0 in - (* Favor the registers that have been assigned to pseudoregs for which - we have a preference. If these pseudoregs have not been assigned - already, avoid the registers with which they conflict. *) - iter_preferred - (fun r w -> - match r.loc with - Reg n -> if n >= first_reg & n < last_reg then - score.(n - first_reg) <- score.(n - first_reg) + w - | Unknown -> - List.iter - (fun neighbour -> - match neighbour.loc with - Reg n -> if n >= first_reg & n < last_reg then - score.(n - first_reg) <- score.(n - first_reg) - w - | _ -> ()) - r.interf - | _ -> ()) - reg; - List.iter - (fun neighbour -> - (* Prohibit the registers that have been assigned - to our neighbours *) - begin match neighbour.loc with - Reg n -> if n >= first_reg & n < last_reg then - score.(n - first_reg) <- (-1000000) - | _ -> () - end; - (* Avoid the registers that have been assigned to pseudoregs - for which our neighbours have a preference *) - iter_preferred - (fun r w -> - match r.loc with - Reg n -> if n >= first_reg & n < last_reg then - score.(n - first_reg) <- score.(n - first_reg) - (w - 1) - (* w-1 to break the symmetry when two conflicting regs - have the same preference for a third reg. *) - | _ -> ()) - neighbour) - reg.interf; - (* Pick the register with the best score *) let best_score = ref (-1000000) and best_reg = ref (-1) in let start = start_register.(cl) in - for n = start to num_regs - 1 do - if score.(n) > !best_score then begin - best_score := score.(n); - best_reg := n - end - done; - for n = 0 to start - 1 do - if score.(n) > !best_score then begin - best_score := score.(n); - best_reg := n - end - done; + if num_regs > 0 then begin + (* Favor the registers that have been assigned to pseudoregs for which + we have a preference. If these pseudoregs have not been assigned + already, avoid the registers with which they conflict. *) + iter_preferred + (fun r w -> + match r.loc with + Reg n -> if n >= first_reg & n < last_reg then + score.(n - first_reg) <- score.(n - first_reg) + w + | Unknown -> + List.iter + (fun neighbour -> + match neighbour.loc with + Reg n -> if n >= first_reg & n < last_reg then + score.(n - first_reg) <- score.(n - first_reg) - w + | _ -> ()) + r.interf + | _ -> ()) + reg; + List.iter + (fun neighbour -> + (* Prohibit the registers that have been assigned + to our neighbours *) + begin match neighbour.loc with + Reg n -> if n >= first_reg & n < last_reg then + score.(n - first_reg) <- (-1000000) + | _ -> () + end; + (* Avoid the registers that have been assigned to pseudoregs + for which our neighbours have a preference *) + iter_preferred + (fun r w -> + match r.loc with + Reg n -> if n >= first_reg & n < last_reg then + score.(n - first_reg) <- score.(n - first_reg) - (w - 1) + (* w-1 to break the symmetry when two conflicting regs + have the same preference for a third reg. *) + | _ -> ()) + neighbour) + reg.interf; + (* Pick the register with the best score *) + for n = start to num_regs - 1 do + if score.(n) > !best_score then begin + best_score := score.(n); + best_reg := n + end + done; + for n = 0 to start - 1 do + if score.(n) > !best_score then begin + best_score := score.(n); + best_reg := n + end + done + end; (* Found a register? *) if !best_reg >= 0 then begin reg.loc <- Reg(first_reg + !best_reg); diff --git a/asmcomp/emit_i386.mlp b/asmcomp/emit_i386.mlp index 5ff2ca0d3..a6e78d5f5 100644 --- a/asmcomp/emit_i386.mlp +++ b/asmcomp/emit_i386.mlp @@ -71,23 +71,10 @@ let emit_align = "linux_elf" -> (fun n -> ` .align {emit_int n}\n`) | _ -> (fun n -> ` .align {emit_int(Misc.log2 n)}\n`) -(* Track the position of the floating-point stack *) - -let fp_offset = ref 0 - -let push_fp () = - incr fp_offset; - if !fp_offset > 4 then fatal_error "Emit: float expression too complex" - -let pop_fp () = - decr fp_offset - (* Output a pseudo-register *) let emit_reg = function - { loc = Reg r; typ = Float } -> - emit_string (register_name(r + !fp_offset)) - | { loc = Reg r } -> + { loc = Reg r } -> emit_string (register_name r) | { loc = Stack s } as r -> let ofs = slot_offset s (register_class r) in @@ -122,17 +109,6 @@ let register_overlap reg arr = with Exit -> true -(* Check if a set of registers contains a float *) - -let contains_floats arr = - try - for i = 0 to Array.length arr - 1 do - if arr.(i).typ = Float then raise Exit - done; - false - with Exit -> - true - (* Output an addressing mode *) let emit_addressing addr r n = @@ -153,13 +129,6 @@ let emit_addressing addr r n = if d <> 0 then emit_int d; `({emit_reg r.(n)}, {emit_reg r.(n+1)}, {emit_int scale})` -(* Emit the operand of a floating-point operation *) - -let emit_float_operand r = - match r.loc with - Stack s -> `l {emit_reg r}` - | _ -> ` {emit_reg r}` - (* Record live pointers at call points *) type frame_descr = @@ -229,21 +198,21 @@ let instr_for_intop = function let instr_for_floatop = function Inegf -> "fchs" | Iabsf -> "fabs" - | Iaddf -> "fadd" - | Isubf -> "fsub" - | Imulf -> "fmul" - | Idivf -> "fdiv" - | Ispecific Isubfrev -> "fsubr" - | Ispecific Idivfrev -> "fdivr" + | Iaddf -> "faddl" + | Isubf -> "fsubl" + | Imulf -> "fmull" + | Idivf -> "fdivl" + | Ispecific Isubfrev -> "fsubrl" + | Ispecific Idivfrev -> "fdivrl" | _ -> fatal_error "Emit_i386: instr_for_floatop" let instr_for_floatop_reversed = function - Iaddf -> "fadd" - | Isubf -> "fsubr" - | Imulf -> "fmul" - | Idivf -> "fdivr" - | Ispecific Isubfrev -> "fsub" - | Ispecific Idivfrev -> "fdiv" + Iaddf -> "faddl" + | Isubf -> "fsubrl" + | Imulf -> "fmull" + | Idivf -> "fdivrl" + | Ispecific Isubfrev -> "fsubl" + | Ispecific Idivfrev -> "fdivl" | _ -> fatal_error "Emit_i386: instr_for_floatop_reversed" let instr_for_floatop_pop = function @@ -295,27 +264,23 @@ let range_check_trap = ref 0 let float_constants = ref ([] : (int * string) list) +let tos = phys_reg 100 + let emit_instr i = match i.desc with Lend -> () | Lop(Imove | Ispill | Ireload) -> - if i.arg.(0).loc <> i.res.(0).loc then begin - match i.arg.(0).typ with - Int | Addr -> - ` movl {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n` - | Float -> - begin match i.arg.(0).loc with - Reg 100 -> (* top of FP stack *) - ` fstpl {emit_reg i.res.(0)}\n`; - pop_fp() - | Reg 101 when !fp_offset = 0 -> (* %st(0) *) - ` fstl {emit_reg i.res.(0)}\n` - | _ -> - ` fldl {emit_reg i.arg.(0)}\n`; - push_fp(); - ` fstpl {emit_reg i.res.(0)}\n`; - pop_fp() - end + let src = i.arg.(0) and dst = i.res.(0) in + if src.loc <> dst.loc then begin + if src.typ = Float then + if src = tos then + ` fstpl {emit_reg dst}\n` + else begin + ` fldl {emit_reg src}\n`; + ` fstpl {emit_reg dst}\n` + end + else + ` movl {emit_reg src}, {emit_reg dst}\n` end | Lop(Iconst_int 0) -> begin match i.res.(0).loc with @@ -334,8 +299,7 @@ let emit_instr i = let lbl = new_label() in float_constants := (lbl, s) :: !float_constants; ` fldl {emit_label lbl}\n` - end; - push_fp() + end | Lop(Iconst_symbol s) -> ` movl ${emit_symbol s}, {emit_reg i.res.(0)}\n` | Lop(Icall_ind) -> @@ -360,12 +324,6 @@ let emit_instr i = ` call {emit_symbol "caml_c_call"}\n`; record_frame i.live end else begin - if contains_floats i.arg or contains_floats i.res then begin - ` ffree %st(0)\n`; - ` ffree %st(1)\n`; - ` ffree %st(2)\n`; - ` ffree %st(3)\n` - end; ` call {emit_symbol s}\n` end | Lop(Istackoffset n) -> @@ -396,23 +354,18 @@ let emit_instr i = ` movswl {emit_addressing addr i.arg 0}, {emit_reg dest}\n` end | Float -> - ` fldl {emit_addressing addr i.arg 0}\n`; - push_fp() + ` fldl {emit_addressing addr i.arg 0}\n` end | Lop(Istore(Word, addr)) -> begin match i.arg.(0).typ with Int | Addr -> ` movl {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n` | Float -> - begin match i.arg.(0).loc with - Reg 100 -> (* top of FP stack *) - ` fstpl {emit_addressing addr i.arg 1}\n`; - pop_fp() - | Reg 101 when !fp_offset = 0 -> (* %st(0) *) - ` fstl {emit_addressing addr i.arg 1}\n` - | _ -> - ` fldl {emit_reg i.arg.(0)}\n`; - ` fstpl {emit_addressing addr i.arg 1}\n` + if i.arg.(0) = tos then + ` fstpl {emit_addressing addr i.arg 1}\n` + else begin + ` fldl {emit_reg i.arg.(0)}\n`; + ` fstpl {emit_addressing addr i.arg 1}\n` end end | Lop(Istore(chunk, addr)) -> @@ -500,25 +453,24 @@ let emit_instr i = (* We have i.arg.(0) = i.res.(0) *) ` {emit_string(instr_for_intop op)} ${emit_int n}, {emit_reg i.res.(0)}\n` | Lop(Inegf | Iabsf as floatop) -> - if i.arg.(0).loc <> Reg 100 then begin + if i.arg.(0) <> tos then ` fldl {emit_reg i.arg.(0)}\n`; - push_fp() - end; ` {emit_string(instr_for_floatop floatop)}\n` | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev) as floatop) -> - begin match (i.arg.(0).loc, i.arg.(1).loc) with - (Reg 100, Reg 100) -> (* both operands on top of FP stack *) - ` {emit_string(instr_for_floatop_pop floatop)} %st(0), %st(1)\n`; - pop_fp() - | (Reg 100, _) -> (* first operand on stack *) - ` {emit_string(instr_for_floatop floatop)}{emit_float_operand i.arg.(1)}\n` - | (_, Reg 100) -> (* second operand on stack *) - ` {emit_string(instr_for_floatop_reversed floatop)}{emit_float_operand i.arg.(0)}\n` - | (_, _) -> (* both in regs or on stack *) - ` fldl {emit_reg i.arg.(0)}\n`; - push_fp(); - ` {emit_string(instr_for_floatop floatop)}{emit_float_operand i.arg.(1)}\n` + if i.arg.(0) = tos && i.arg.(1) = tos then + (* both operands on top of FP stack *) + ` {emit_string(instr_for_floatop_pop floatop)} %st(0), %st(1)\n` + else if i.arg.(0) = tos then + (* first operand on stack *) + ` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n` + else if i.arg.(1) = tos then + (* second operand on stack *) + ` {emit_string(instr_for_floatop_reversed floatop)} {emit_reg i.arg.(0)}\n` + else begin + (* both operands in memory *) + ` fldl {emit_reg i.arg.(0)}\n`; + ` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n` end | Lop(Ifloatofint) -> begin match i.arg.(0).loc with @@ -528,13 +480,10 @@ let emit_instr i = ` pushl {emit_reg i.arg.(0)}\n`; ` fildl (%esp)\n`; ` addl $4, %esp\n` - end; - push_fp() + end | Lop(Iintoffloat) -> - if i.arg.(0).loc <> Reg 100 then begin + if i.arg.(0) <> tos then ` fldl {emit_reg i.arg.(0)}\n`; - push_fp() - end; stack_offset := !stack_offset - 8; ` subl $8, %esp\n`; ` fnstcw 4(%esp)\n`; @@ -549,7 +498,6 @@ let emit_instr i = ` fistpl (%esp)\n`; ` movl (%esp), {emit_reg i.res.(0)}\n` end; - pop_fp(); ` fldcw 4(%esp)\n`; ` addl $8, %esp\n`; stack_offset := !stack_offset + 8 @@ -566,22 +514,14 @@ let emit_instr i = for n = Array.length i.arg - 1 downto 0 do let r = i.arg.(n) in match r with - {loc = Reg rn; typ = Float} -> + {loc = Reg _; typ = Float} -> ` subl $8, %esp\n`; - stack_offset := !stack_offset + 8; - begin match rn with - 100 -> - ` fstpl 0(%esp)\n`; - pop_fp() - | 101 when !fp_offset = 0 -> - ` fstl 0(%esp)\n` - | _ -> - ` fldl {emit_reg r}\n`; - ` fstpl 0(%esp)\n` - end + ` fstpl 0(%esp)\n`; + stack_offset := !stack_offset + 8 | {loc = Stack sl; typ = Float} -> - ` pushl 4+{emit_reg r}\n`; - ` pushl 4+{emit_reg r}\n`; + let ofs = slot_offset sl 1 in + ` pushl {emit_int(ofs + 4)}(%esp)\n`; + ` pushl {emit_int(ofs + 4)}(%esp)\n`; stack_offset := !stack_offset + 8 | _ -> ` pushl {emit_reg r}\n`; @@ -601,10 +541,8 @@ let emit_instr i = ` pushl {emit_addressing addr i.arg 0}\n`; stack_offset := !stack_offset + 4 | Lop(Ispecific(Ifloatarithmem(op, addr))) -> - if i.arg.(0).loc <> Reg 100 then begin + if i.arg.(0) <> tos then ` fldl {emit_reg i.arg.(0)}\n`; - push_fp() - end; ` {emit_string(instr_for_floatarithmem op)} {emit_addressing addr i.arg 1}\n` | Lreloadretaddr -> () @@ -636,54 +574,45 @@ let emit_instr i = ` cmpl ${emit_int n}, {emit_reg i.arg.(0)}\n`; let b = name_for_cond_branch cmp in ` j{emit_string b} {emit_label lbl}\n` + | Ifloattest((Ceq | Cne as cmp), neg) -> + if i.arg.(1) <> tos then + ` fldl {emit_reg i.arg.(1)}\n`; + if i.arg.(0) <> tos then + ` fldl {emit_reg i.arg.(0)}\n`; + ` fucompp\n`; + ` fnstsw %ax\n`; + let neg1 = if cmp = Ceq then neg else not neg in + if neg1 then begin (* branch if different *) + ` andb $68, %ah\n`; + ` xorb $64, %ah\n`; + ` jne {emit_label lbl}\n` + end else begin (* branch if equal *) + ` andb $69, %ah\n`; + ` cmpb $64, %ah\n`; + ` je {emit_label lbl}\n` + end | Ifloattest(cmp, neg) -> - let instr = - match cmp with - Ceq | Cne -> "fucom" - | _ -> "fcom" in let actual_cmp = - match (i.arg.(0).loc, i.arg.(1).loc) with - (Reg 100, Reg 100) -> (* both args on top of FP stack *) - ` {emit_string instr}pp\n`; - fp_offset := !fp_offset - 2; - cmp - | (Reg 100, _) -> (* first arg on top of FP stack *) - ` {emit_string instr}p{emit_float_operand i.arg.(1)}\n`; - pop_fp(); - cmp - | (_, Reg 100) -> (* second arg on top of FP stack *) - ` {emit_string instr}p{emit_float_operand i.arg.(0)}\n`; - pop_fp(); - Cmm.swap_comparison cmp - | (_, _) -> - ` fldl {emit_reg i.arg.(0)}\n`; - push_fp(); - ` {emit_string instr}p{emit_float_operand i.arg.(1)}\n`; - pop_fp(); - cmp in + if i.arg.(0) = tos && i.arg.(1) = tos then begin + (* both args on top of FP stack *) + ` fcompp\n`; + cmp + end else if i.arg.(0) = tos then begin + (* first arg on top of FP stack *) + ` fcompl {emit_reg i.arg.(1)}\n`; + cmp + end else if i.arg.(1) = tos then begin + (* second arg on top of FP stack *) + ` fcompl {emit_reg i.arg.(0)}\n`; + Cmm.swap_comparison cmp + end else begin + ` fldl {emit_reg i.arg.(0)}\n`; + ` fcompl {emit_reg i.arg.(1)}\n`; + cmp + end in ` fnstsw %ax\n`; begin match actual_cmp with - Ceq -> - if neg then begin - ` andb $68, %ah\n`; - ` xorb $64, %ah\n`; - ` jne ` - end else begin - ` andb $69, %ah\n`; - ` cmpb $64, %ah\n`; - ` je ` - end - | Cne -> - if neg then begin - ` andb $69, %ah\n`; - ` cmpb $64, %ah\n`; - ` je ` - end else begin - ` andb $68, %ah\n`; - ` xorb $64, %ah\n`; - ` jne ` - end - | Cle -> + Cle -> ` andb $69, %ah\n`; ` decb %ah\n`; ` cmpb $64, %ah\n`; @@ -706,6 +635,7 @@ let emit_instr i = if neg then ` jne ` else ` je ` + | _ -> fatal_error "Emit_i386: floattest" end; `{emit_label lbl}\n` | Ioddtest -> diff --git a/asmcomp/emit_i386nt.mlp b/asmcomp/emit_i386nt.mlp index 0592fc9a1..e709f4bf6 100644 --- a/asmcomp/emit_i386nt.mlp +++ b/asmcomp/emit_i386nt.mlp @@ -45,7 +45,8 @@ let slot_offset loc cl = else !stack_offset + num_stack_slots.(0) * 4 + n * 8 | Outgoing n -> n -(* Record symbols used and defined - at the end generate extern for those used but not defined *) +(* Record symbols used and defined - at the end generate extern for those + used but not defined *) let symbols_defined = ref StringSet.empty let symbols_used = ref StringSet.empty @@ -68,23 +69,10 @@ let emit_label lbl = let emit_align n = ` ALIGN {emit_int n}\n` -(* Track the position of the floating-point stack *) - -let fp_offset = ref 0 - -let push_fp () = - incr fp_offset; - if !fp_offset > 4 then fatal_error "Emit: float expression too complex" - -let pop_fp () = - decr fp_offset - (* Output a pseudo-register *) let emit_reg = function - { loc = Reg r; typ = Float } -> - emit_string (register_name(r + !fp_offset)) - | { loc = Reg r } -> + { loc = Reg r } -> emit_string (register_name r) | { loc = Stack s; typ = Float } as r -> let ofs = slot_offset s (register_class r) in @@ -122,17 +110,6 @@ let register_overlap reg arr = with Exit -> true -(* Check if a set of registers contains a float *) - -let contains_floats arr = - try - for i = 0 to Array.length arr - 1 do - if arr.(i).typ = Float then raise Exit - done; - false - with Exit -> - true - (* Output an addressing mode *) let emit_signed_int d = @@ -153,13 +130,6 @@ let emit_addressing addr r n = | Iindexed2scaled(scale, d) -> `[{emit_reg r.(n)}+{emit_reg r.(n+1)}*{emit_int scale}{emit_signed_int d}]` -(* Emit the operand of a floating-point operation *) - -let emit_float_operand r = - match r.loc with - Reg _ -> `st, {emit_reg r}` - | _ -> `{emit_reg r}` - (* Record live pointers at call points *) type frame_descr = @@ -286,27 +256,23 @@ let range_check_trap = ref 0 let float_constants = ref ([] : (int * string) list) +let tos = phys_reg 100 + let emit_instr i = match i.desc with Lend -> () | Lop(Imove | Ispill | Ireload) -> - if i.arg.(0).loc <> i.res.(0).loc then begin - match i.arg.(0).typ with - Int | Addr -> - ` mov {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n` - | Float -> - begin match i.arg.(0).loc with - Reg 100 -> (* top of FP stack *) - ` fstp {emit_reg i.res.(0)}\n`; - pop_fp() - | Reg 101 when !fp_offset = 0 -> (* %st(0) *) - ` fst {emit_reg i.res.(0)}\n` - | _ -> - ` fld {emit_reg i.arg.(0)}\n`; - push_fp(); - ` fstp {emit_reg i.res.(0)}\n`; - pop_fp() - end + let src = i.arg.(0) and dst = i.res.(0) in + if src.loc <> dst.loc then begin + if src.typ = Float then + if src = tos then + ` fstp {emit_reg dst}\n` + else begin + ` fld {emit_reg src}\n`; + ` fstp {emit_reg dst}\n` + end + else + ` movl {emit_reg dst}, {emit_reg src}\n` end | Lop(Iconst_int 0) -> begin match i.res.(0).loc with @@ -325,8 +291,7 @@ let emit_instr i = let lbl = new_label() in float_constants := (lbl, s) :: !float_constants; ` fld {emit_label lbl}\n` - end; - push_fp() + end | Lop(Iconst_symbol s) -> add_used_symbol s; ` mov {emit_reg i.res.(0)}, OFFSET {emit_symbol s}\n` @@ -355,12 +320,6 @@ let emit_instr i = ` call _caml_c_call\n`; record_frame i.live end else begin - if contains_floats i.arg or contains_floats i.res then begin - ` ffree st(0)\n`; - ` ffree st(1)\n`; - ` ffree st(2)\n`; - ` ffree st(3)\n` - end; ` call {emit_symbol s}\n` end | Lop(Istackoffset n) -> @@ -391,21 +350,16 @@ let emit_instr i = ` movsx {emit_reg dest}, WORD PTR {emit_addressing addr i.arg 0}\n` end | Float -> - ` fld REAL8 PTR {emit_addressing addr i.arg 0}\n`; - push_fp() + ` fld REAL8 PTR {emit_addressing addr i.arg 0}\n` end | Lop(Istore(Word, addr)) -> begin match i.arg.(0).typ with Int | Addr -> ` mov DWORD PTR {emit_addressing addr i.arg 1}, {emit_reg i.arg.(0)}\n` | Float -> - begin match i.arg.(0).loc with - Reg 100 -> (* top of FP stack *) - ` fstp REAL8 PTR {emit_addressing addr i.arg 1}\n`; - pop_fp() - | Reg 101 when !fp_offset = 0 -> (* %st(0) *) - ` fst REAL8 PTR {emit_addressing addr i.arg 1}\n` - | _ -> + if i.arg.(0) = tos then + ` fstp REAL8 PTR {emit_addressing addr i.arg 1}\n` + else begin ` fld {emit_reg i.arg.(0)}\n`; ` fstp REAL8 PTR {emit_addressing addr i.arg 1}\n` end @@ -495,25 +449,24 @@ let emit_instr i = (* We have i.arg.(0) = i.res.(0) *) ` {emit_string(instr_for_intop op)} {emit_reg i.res.(0)}, {emit_int n}\n` | Lop(Inegf | Iabsf as floatop) -> - if i.arg.(0).loc <> Reg 100 then begin + if i.arg.(0) <> tos then begin ` fld {emit_reg i.arg.(0)}\n`; - push_fp() - end; ` {emit_string(instr_for_floatop floatop)}\n` | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev) as floatop) -> - begin match (i.arg.(0).loc, i.arg.(1).loc) with - (Reg 100, Reg 100) -> (* both operands on top of FP stack *) - ` {emit_string(instr_for_floatop_reversed floatop)}\n`; - pop_fp() - | (Reg 100, _) -> (* first operand on stack *) - ` {emit_string(instr_for_floatop floatop)} {emit_float_operand i.arg.(1)}\n` - | (_, Reg 100) -> (* second operand on stack *) - ` {emit_string(instr_for_floatop_reversed floatop)} {emit_float_operand i.arg.(0)}\n` - | (_, _) -> (* both in regs or on stack *) - ` fld {emit_reg i.arg.(0)}\n`; - push_fp(); - ` {emit_string(instr_for_floatop floatop)} {emit_float_operand i.arg.(1)}\n` + if i.arg.(0) = tos && i.arg.(1) = tos then + (* both operands on top of FP stack *) + ` {emit_string(instr_for_floatop_reversed floatop)}\n` + else if i.arg.(0) = tos then + (* first operand on stack *) + ` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n` + else if i.arg.(1) = tos then + (* second operand on stack *) + ` {emit_string(instr_for_floatop_reversed floatop)} {emit_reg i.arg.(0)}\n` + else begin + (* both operands in memory *) + ` fld {emit_reg i.arg.(0)}\n`; + ` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n` end | Lop(Ifloatofint) -> begin match i.arg.(0).loc with @@ -523,13 +476,10 @@ let emit_instr i = ` push {emit_reg i.arg.(0)}\n`; ` fild DWORD PTR [esp]\n`; ` add esp, 4\n` - end; - push_fp() + end | Lop(Iintoffloat) -> - if i.arg.(0).loc <> Reg 100 then begin + if i.arg.(0) <> tos then ` fld {emit_reg i.arg.(0)}\n`; - push_fp() - end; stack_offset := !stack_offset - 8; ` sub esp, 8\n`; ` fnstcw [esp+4]\n`; @@ -544,7 +494,6 @@ let emit_instr i = ` fistp DWORD PTR [esp]\n`; ` mov {emit_reg i.res.(0)}, [esp]\n` end; - pop_fp(); ` fldcw [esp+4]\n`; ` add esp, 8\n`; stack_offset := !stack_offset + 8 @@ -564,17 +513,8 @@ let emit_instr i = match r with {loc = Reg rn; typ = Float} -> ` sub esp, 8\n`; - stack_offset := !stack_offset + 8; - begin match rn with - 100 -> - ` fstp REAL8 PTR 0[esp]\n`; - pop_fp() - | 101 when !fp_offset = 0 -> - ` fst REAL8 PTR 0[esp]\n` - | _ -> - ` fld {emit_reg r}\n`; - ` fstp REAL8 PTR 0[esp]\n` - end + ` fstp REAL8 PTR 0[esp]\n`; + stack_offset := !stack_offset + 8 | {loc = Stack sl; typ = Float} -> let ofs = slot_offset sl 1 in ` push DWORD PTR {emit_int (ofs + 4)}[esp]\n`; @@ -598,10 +538,8 @@ let emit_instr i = ` push DWORD PTR {emit_addressing addr i.arg 0}\n`; stack_offset := !stack_offset + 4 | Lop(Ispecific(Ifloatarithmem(op, addr))) -> - if i.arg.(0).loc <> Reg 100 then begin + if i.arg.(0) <> tos then ` fld {emit_reg i.arg.(0)}\n`; - push_fp() - end; ` {emit_string(instr_for_floatarithmem op)} REAL8 PTR {emit_addressing addr i.arg 1}\n` | Lreloadretaddr -> () @@ -633,54 +571,45 @@ let emit_instr i = ` cmp {emit_reg i.arg.(0)}, {emit_int n}\n`; let b = name_for_cond_branch cmp in ` j{emit_string b} {emit_label lbl}\n` + | Ifloattest((Ceq | Cne as cmp), neg) -> + if i.arg.(1) <> tos then + ` fld {emit_reg i.arg.(1)}\n`; + if i.arg.(0) <> tos then + ` fld {emit_reg i.arg.(0)}\n`; + ` fucompp\n`; + ` fnstsw ax\n`; + let neg1 = if cmp = Ceq then neg else not neg in + if neg1 then begin (* branch if different *) + ` and ah, 68\n`; + ` xor ah, 64\n`; + ` jne {emit_label lbl}\n` + end else begin (* branch if equal *) + ` and ah, 69\n`; + ` cmp ah, 64\n`; + ` je {emit_label lbl}\n` + end | Ifloattest(cmp, neg) -> - let instr = - match cmp with - Ceq | Cne -> "fucom" - | _ -> "fcom" in let actual_cmp = - match (i.arg.(0).loc, i.arg.(1).loc) with - (Reg 100, Reg 100) -> (* both args on top of FP stack *) - ` {emit_string instr}pp\n`; - fp_offset := !fp_offset - 2; - cmp - | (Reg 100, _) -> (* first arg on top of FP stack *) - ` {emit_string instr}p {emit_reg i.arg.(1)}\n`; - pop_fp(); - cmp - | (_, Reg 100) -> (* second arg on top of FP stack *) - ` {emit_string instr}p {emit_reg i.arg.(0)}\n`; - pop_fp(); - Cmm.swap_comparison cmp - | (_, _) -> - ` fld {emit_reg i.arg.(0)}\n`; - push_fp(); - ` {emit_string instr}p {emit_reg i.arg.(1)}\n`; - pop_fp(); - cmp in + if i.arg.(0) = tos && i.arg.(1) = tos then begin + (* both args on top of FP stack *) + ` fcompp\n`; + cmp + end else if i.arg.(0) = tos then begin + (* first arg on top of FP stack *) + ` fcomp {emit_reg i.arg.(1)}\n`; + cmp + end else if i.arg.(1) = tos then begin + (* second arg on top of FP stack *) + ` fcomp {emit_reg i.arg.(0)}\n`; + Cmm.swap_comparison cmp + end else begin + ` fld {emit_reg i.arg.(0)}\n`; + ` fcomp {emit_reg i.arg.(1)}\n`; + cmp + end in ` fnstsw ax\n`; begin match actual_cmp with - Ceq -> - if neg then begin - ` and ah, 68\n`; - ` xor ah, 64\n`; - ` jne ` - end else begin - ` and ah, 69\n`; - ` cmp ah, 64\n`; - ` je ` - end - | Cne -> - if neg then begin - ` and ah, 69\n`; - ` cmp ah, 64\n`; - ` je ` - end else begin - ` and ah, 68\n`; - ` xor ah, 64\n`; - ` jne ` - end - | Cle -> + Cle -> ` and ah, 69\n`; ` dec ah\n`; ` cmp ah, 64\n`; @@ -703,6 +632,7 @@ let emit_instr i = if neg then ` jne ` else ` je ` + | _ -> fatal_error "Emit_i386: floattest" end; `{emit_label lbl}\n` | Ioddtest -> diff --git a/asmcomp/proc_i386.ml b/asmcomp/proc_i386.ml index ba280f9f4..b29a657a1 100644 --- a/asmcomp/proc_i386.ml +++ b/asmcomp/proc_i386.ml @@ -31,19 +31,13 @@ open Mach edi 5 ebp 6 - f0 - f3 101-104 function arguments and results - f0: C function results - not preserved by C - - The other 4 floating-point registers are treated as a stack. - We use the pseudo-register %tos (100) to represent the top of that stack. *) + tos 100 top of floating-point stack. *) let int_reg_name = [| "%eax"; "%ebx"; "%ecx"; "%edx"; "%esi"; "%edi"; "%ebp" |] let float_reg_name = - [| "%tos"; "%st(0)"; "%st(1)"; "%st(2)"; "%st(3)"; - "%st(4)"; "%st(5)"; "%st(6)"; "%st(7)" |] + [| "%tos" |] let num_register_classes = 2 @@ -53,15 +47,15 @@ let register_class r = | Addr -> 0 | Float -> 1 -let num_available_registers = [| 7; 4 |] +let num_available_registers = [| 7; 0 |] -let first_available_register = [| 0; 101 |] +let first_available_register = [| 0; 100 |] let register_name r = if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100) -(* There is little scheduling, and some operations are more efficient when - %eax or %st(0) are arguments *) +(* There is little scheduling, and some operations are more compact + when their argument is %eax. *) let rotate_registers = false @@ -72,10 +66,7 @@ let hard_int_reg = for i = 0 to 6 do v.(i) <- Reg.at_location Int (Reg i) done; v -let hard_float_reg = - let v = Array.create 5 Reg.dummy in - for i = 0 to 4 do v.(i) <- Reg.at_location Float (Reg(i + 100)) done; - v +let hard_float_reg = [| Reg.at_location Float (Reg 100) |] let all_phys_regs = Array.append hard_int_reg hard_float_reg @@ -346,23 +337,23 @@ let outgoing ofs = Outgoing ofs let not_supported ofs = fatal_error "Proc.loc_results: cannot call" let loc_arguments arg = - calling_conventions 0 5 101 104 outgoing arg + calling_conventions 0 5 100 99 outgoing arg let loc_parameters arg = - let (loc, ofs) = calling_conventions 0 5 101 104 incoming arg in loc + let (loc, ofs) = calling_conventions 0 5 100 99 incoming arg in loc let loc_results res = - let (loc, ofs) = calling_conventions 0 5 101 104 not_supported res in loc + let (loc, ofs) = calling_conventions 0 5 100 100 not_supported res in loc let extcall_use_push = true let loc_external_arguments arg = fatal_error "Proc.loc_external_arguments" let loc_external_results res = - let (loc, ofs) = calling_conventions 0 0 101 101 not_supported res in loc + let (loc, ofs) = calling_conventions 0 0 100 100 not_supported res in loc let loc_exn_bucket = eax (* Registers destroyed by operations *) let destroyed_at_c_call = (* ebx, esi, edi, ebp preserved *) - Array.of_list(List.map phys_reg [0;2;3;100;101;102;103;104]) + Array.of_list(List.map phys_reg [0;2;3]) let destroyed_at_oper = function Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs @@ -382,13 +373,13 @@ let destroyed_at_raise = all_phys_regs let safe_register_pressure op = 4 let max_register_pressure = function - Iextcall(_, _) -> [| 4; 0 |] - | Iintop(Idiv | Imod) -> [| 5; 4 |] + Iextcall(_, _) -> [| 4; max_int |] + | Iintop(Idiv | Imod) -> [| 5; max_int |] | Ialloc _ | Iintop(Icomp _) | Iintop_imm(Icomp _, _) | - Iintoffloat -> [| 6; 4 |] - | _ -> [|7; 4|] + Iintoffloat -> [| 6; max_int |] + | _ -> [|7; max_int |] -(* Reloading of instruction arguments, storing of instruction results *) +(* Reloading of instruction arguments, storing of instruction results. *) let stackp r = match r.loc with @@ -403,6 +394,10 @@ let reload_test makereg tst arg = else arg | _ -> arg +(* Since #floatregs = 0, pseudoregs of type float will never be reloaded. + Hence there is no need to make special cases for + floating-point operations. *) + let reload_operation makereg op arg res = match op with Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor|Icomp _|Icheckbound) -> @@ -411,21 +406,16 @@ let reload_operation makereg op arg res = then ([|arg.(0); makereg arg.(1)|], res) else (arg, res) | Iintop(Ilsl|Ilsr|Iasr) | Iintop_imm(_, _) | Ifloatofint | Iintoffloat | - Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf | Ispecific(Ipush) -> + Ispecific(Ipush) -> (* The argument(s) can be either in register or on stack *) (arg, res) - | Ispecific(Ifloatarithmem(_, _)) -> - (* First arg can be either in register or on stack, but remaining - arguments must be in registers *) - let newarg = Array.create (Array.length arg) arg.(0) in - for i = 1 to Array.length arg - 1 do newarg.(i) <- makereg arg.(i) done; - (newarg, res) | _ -> (* Other operations: all args and results in registers *) raise Use_default (* Scheduling is turned off because our model does not fit the 486 nor Pentium very well. In particular, it messes up with the - float reg stack. *) + float reg stack. The Pentium Pro schedules at run-time much better + than what we could do. *) let need_scheduling = false diff --git a/asmcomp/proc_i386nt.ml b/asmcomp/proc_i386nt.ml index 6037fe28d..0a787c9d5 100644 --- a/asmcomp/proc_i386nt.ml +++ b/asmcomp/proc_i386nt.ml @@ -31,19 +31,13 @@ open Mach edi 5 ebp 6 - f0 - f3 101-104 function arguments and results - f0: C function results - not preserved by C - - The other 4 floating-point registers are treated as a stack. - We use the pseudo-register %tos (100) to represent the top of that stack. *) + tos 100 top of floating-point stack. *) let int_reg_name = [| "eax"; "ebx"; "ecx"; "edx"; "esi"; "edi"; "ebp" |] let float_reg_name = - [| "tos"; "st(0)"; "st(1)"; "st(2)"; "st(3)"; - "st(4)"; "st(5)"; "st(6)"; "st(7)" |] + [| "tos" |] let num_register_classes = 2 @@ -53,15 +47,15 @@ let register_class r = | Addr -> 0 | Float -> 1 -let num_available_registers = [| 7; 4 |] +let num_available_registers = [| 7; 0 |] -let first_available_register = [| 0; 101 |] +let first_available_register = [| 0; 100 |] let register_name r = if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100) -(* There is little scheduling, and some operations are more efficient when - eax or st(0) are arguments *) +(* There is little scheduling, and some operations are more compact + when their argument is %eax. *) let rotate_registers = false @@ -72,10 +66,7 @@ let hard_int_reg = for i = 0 to 6 do v.(i) <- Reg.at_location Int (Reg i) done; v -let hard_float_reg = - let v = Array.create 5 Reg.dummy in - for i = 0 to 4 do v.(i) <- Reg.at_location Float (Reg(i + 100)) done; - v +let hard_float_reg = [| Reg.at_location Float (Reg 100) |] let all_phys_regs = Array.append hard_int_reg hard_float_reg @@ -346,23 +337,23 @@ let outgoing ofs = Outgoing ofs let not_supported ofs = fatal_error "Proc.loc_results: cannot call" let loc_arguments arg = - calling_conventions 0 5 101 104 outgoing arg + calling_conventions 0 5 100 99 outgoing arg let loc_parameters arg = - let (loc, ofs) = calling_conventions 0 5 101 104 incoming arg in loc + let (loc, ofs) = calling_conventions 0 5 100 99 incoming arg in loc let loc_results res = - let (loc, ofs) = calling_conventions 0 5 101 104 not_supported res in loc + let (loc, ofs) = calling_conventions 0 5 100 100 not_supported res in loc let extcall_use_push = true let loc_external_arguments arg = fatal_error "Proc.loc_external_arguments" let loc_external_results res = - let (loc, ofs) = calling_conventions 0 0 101 101 not_supported res in loc + let (loc, ofs) = calling_conventions 0 0 100 100 not_supported res in loc let loc_exn_bucket = eax (* Registers destroyed by operations *) let destroyed_at_c_call = (* ebx, esi, edi, ebp preserved *) - Array.of_list(List.map phys_reg [0;2;3;100;101;102;103;104]) + Array.of_list(List.map phys_reg [0;2;3]) let destroyed_at_oper = function Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs @@ -382,11 +373,11 @@ let destroyed_at_raise = all_phys_regs let safe_register_pressure op = 4 let max_register_pressure = function - Iextcall(_, _) -> [| 4; 0 |] - | Iintop(Idiv | Imod) -> [| 5; 4 |] + Iextcall(_, _) -> [| 4; max_int |] + | Iintop(Idiv | Imod) -> [| 5; max_int |] | Ialloc _ | Iintop(Icomp _) | Iintop_imm(Icomp _, _) | - Iintoffloat -> [| 6; 4 |] - | _ -> [|7; 4|] + Iintoffloat -> [| 6; max_int |] + | _ -> [|7; max_int |] (* Reloading of instruction arguments, storing of instruction results *) @@ -403,6 +394,10 @@ let reload_test makereg tst arg = else arg | _ -> arg +(* Since #floatregs = 0, pseudoregs of type float will never be reloaded. + Hence there is no need to make special cases for + floating-point operations. *) + let reload_operation makereg op arg res = match op with Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor|Icomp _|Icheckbound) -> @@ -411,15 +406,9 @@ let reload_operation makereg op arg res = then ([|arg.(0); makereg arg.(1)|], res) else (arg, res) | Iintop(Ilsl|Ilsr|Iasr) | Iintop_imm(_, _) | Ifloatofint | Iintoffloat | - Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf | Ispecific(Ipush) -> + Ispecific(Ipush) -> (* The argument(s) can be either in register or on stack *) (arg, res) - | Ispecific(Ifloatarithmem(_, _)) -> - (* First arg can be either in register or on stack, but remaining - arguments must be in registers *) - let newarg = Array.create (Array.length arg) arg.(0) in - for i = 1 to Array.length arg - 1 do newarg.(i) <- makereg arg.(i) done; - (newarg, res) | _ -> (* Other operations: all args and results in registers *) raise Use_default diff --git a/asmcomp/reload.ml b/asmcomp/reload.ml index 7c559b2dc..c57631d1d 100644 --- a/asmcomp/reload.ml +++ b/asmcomp/reload.ml @@ -33,7 +33,10 @@ let makereg r = match r.loc with Unknown -> fatal_error "Reload.makereg" | Reg _ -> r - | Stack _ -> redo_regalloc := true; Reg.clone r + | Stack _ -> + if Proc.num_available_registers.(Proc.register_class r) = 0 + then r + else begin redo_regalloc := true; Reg.clone r end let makeregs rv = let n = Array.length rv in diff --git a/asmrun/i386.S b/asmrun/i386.S index 4902addde..7980afbce 100644 --- a/asmrun/i386.S +++ b/asmrun/i386.S @@ -138,16 +138,10 @@ L103: subl G(young_ptr), %eax /* eax = - size */ .align FUNCTION_ALIGN G(caml_c_call): /* Record lowest stack address and return address */ - /* In parallel, free the floating point registers */ - /* (Pairing is expected on the Pentium.) */ movl (%esp), %edx - ffree %st(0) movl %edx, G(caml_last_return_address) - ffree %st(1) leal 4(%esp), %edx - ffree %st(2) movl %edx, G(caml_bottom_of_stack) - ffree %st(3) /* Call the function (address in %eax) */ jmp *%eax @@ -223,17 +217,11 @@ L107: used by caml_c_call */ popl G(caml_bottom_of_stack) popl G(caml_last_return_address) - /* Restore callee-save registers. - In parallel, free the floating-point registers - that may have been used by Caml. */ + /* Restore callee-save registers. */ popl %ebp - ffree %st(0) popl %edi - ffree %st(1) popl %esi - ffree %st(2) popl %ebx - ffree %st(3) /* Return to caller. */ ret L108: diff --git a/asmrun/i386nt.asm b/asmrun/i386nt.asm index fbd3277e4..4eaa7ba15 100644 --- a/asmrun/i386nt.asm +++ b/asmrun/i386nt.asm @@ -48,6 +48,32 @@ _caml_exception_pointer DWORD 0 PUBLIC _caml_alloc PUBLIC _caml_call_gc +_caml_call_gc: + ; Record lowest stack address and return address + mov eax, [esp] + mov _caml_last_return_address, eax + lea eax, [esp+4] + mov _caml_bottom_of_stack, eax + ; Save all regs used by the code generator +L105: mov _gc_entry_regs + 4, ebx + mov _gc_entry_regs + 8, ecx + mov _gc_entry_regs + 12, edx + mov _gc_entry_regs + 16, esi + mov _gc_entry_regs + 20, edi + mov _gc_entry_regs + 24, ebp + ; Call the garbage collector + call _garbage_collection + ; Restore all regs used by the code generator + mov ebx, _gc_entry_regs + 4 + mov ecx, _gc_entry_regs + 8 + mov edx, _gc_entry_regs + 12 + mov esi, _gc_entry_regs + 16 + mov edi, _gc_entry_regs + 20 + mov ebp, _gc_entry_regs + 24 + ; Return to caller + push _caml_last_return_address + ret + ALIGN 4 _caml_alloc1: mov eax, _young_ptr @@ -56,8 +82,12 @@ _caml_alloc1: cmp eax, _young_limit jb L100 ret -L100: mov eax, 8 - jmp L105 +L100: mov eax, [esp] + mov _caml_last_return_address, eax + lea eax, [esp+4] + mov _caml_bottom_of_stack, eax + call L105 + jmp _caml_alloc1 ALIGN 4 _caml_alloc2: @@ -67,8 +97,12 @@ _caml_alloc2: cmp eax, _young_limit jb L101 ret -L101: mov eax, 12 - jmp L105 +L101: mov eax, [esp] + mov _caml_last_return_address, eax + lea eax, [esp+4] + mov _caml_bottom_of_stack, eax + call L105 + jmp _caml_alloc2 ALIGN 4 _caml_alloc3: @@ -78,59 +112,31 @@ _caml_alloc3: cmp eax, _young_limit jb L102 ret -L102: mov eax, 16 - jmp L105 +L102: mov eax, [esp] + mov _caml_last_return_address, eax + lea eax, [esp+4] + mov _caml_bottom_of_stack, eax + call L105 + jmp _caml_alloc3 ALIGN 4 _caml_alloc: - push eax - mov eax, _young_ptr - sub eax, [esp] - mov _young_ptr, eax - cmp eax, _young_limit - jb L103 - add esp, 4 - ret -L103: pop eax - jmp L105 - -_caml_call_gc: - ; Adjust return address and recover desired size in eax - pop eax - add eax, 2 - push eax - movzx eax, WORD PTR [eax-2] -L105: - ; Record lowest stack address and return address - pop _caml_last_return_address - mov _caml_bottom_of_stack, esp - ; Save all regs used by the code generator - mov _gc_entry_regs + 4, ebx - mov _gc_entry_regs + 8, ecx - mov _gc_entry_regs + 12, edx - mov _gc_entry_regs + 16, esi - mov _gc_entry_regs + 20, edi - mov _gc_entry_regs + 24, ebp - ; Save desired size - push eax - ; Call the garbage collector - call _garbage_collection - ; Restore all regs used by the code generator - mov ebx, _gc_entry_regs + 4 - mov ecx, _gc_entry_regs + 8 - mov edx, _gc_entry_regs + 12 - mov esi, _gc_entry_regs + 16 - mov edi, _gc_entry_regs + 20 - mov ebp, _gc_entry_regs + 24 - ; Recover desired size - pop eax - ; Decrement young_ptr by desired size - sub _young_ptr, eax - ; Reload result of allocation in %eax - mov eax, _young_ptr - ; Return to caller - push _caml_last_return_address - ret + sub eax, _young_ptr ; eax = size - young_ptr + neg eax ; eax = young_ptr - size + cmp eax, _young_limit + jb L103 + mov _young_ptr, eax + ret +L103: sub eax, _young_ptr ; eax = - size + neg eax ; eax = size + push eax ; save desired size + mov eax, [esp+4] + mov _caml_last_return_address, eax + lea eax, [esp+8] + mov _caml_bottom_of_stack, eax + call L105 + pop eax ; recover desired size + jmp _caml_alloc ; Call a C function from Caml @@ -138,16 +144,10 @@ L105: ALIGN 4 _caml_c_call: ; Record lowest stack address and return address - ; In parallel, free the floating point registers - ; (Pairing is expected on the Pentium.) mov edx, [esp] - ffree st(0) mov _caml_last_return_address, edx - ffree st(1) lea edx, [esp+4] - ffree st(2) mov _caml_bottom_of_stack, edx - ffree st(3) ; Call the function (address in %eax) jmp eax @@ -224,16 +224,10 @@ L107: pop _caml_bottom_of_stack pop _caml_last_return_address ; Restore callee-save registers. - ; In parallel, free the floating-point registers - ; that may have been used by Caml. pop ebp - ffree st(0) pop edi - ffree st(1) pop esi - ffree st(2) pop ebx - ffree st(3) ; Return to caller. ret L108: |