summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXavier Leroy <xavier.leroy@inria.fr>1996-05-16 19:39:26 +0000
committerXavier Leroy <xavier.leroy@inria.fr>1996-05-16 19:39:26 +0000
commit2a309d688f7d6e673563ab11c06fe4e294cc4bfa (patch)
tree42de46da0f07425f1f041cad8755eb2401324767
parent064465320e18f583d63fe6c008ab9b8c4c22b0ae (diff)
Ne plus utiliser de registres flottants
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@825 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
-rw-r--r--asmcomp/coloring.ml129
-rw-r--r--asmcomp/emit_i386.mlp254
-rw-r--r--asmcomp/emit_i386nt.mlp222
-rw-r--r--asmcomp/proc_i386.ml58
-rw-r--r--asmcomp/proc_i386nt.ml53
-rw-r--r--asmcomp/reload.ml5
-rw-r--r--asmrun/i386.S14
-rw-r--r--asmrun/i386nt.asm126
8 files changed, 347 insertions, 514 deletions
diff --git a/asmcomp/coloring.ml b/asmcomp/coloring.ml
index 74cfbb632..5ba3dc50c 100644
--- a/asmcomp/coloring.ml
+++ b/asmcomp/coloring.ml
@@ -46,15 +46,22 @@ let constrained = ref Reg.Set.empty
let find_degree reg =
if reg.spill then () else begin
- let deg = ref 0 in
let cl = Proc.register_class reg in
- List.iter
- (fun r -> if not r.spill & Proc.register_class r = cl then incr deg)
- reg.interf;
- reg.degree <- !deg;
- if !deg >= Proc.num_available_registers.(cl)
- then constrained := Reg.Set.add reg !constrained
- else unconstrained := Reg.Set.add reg !unconstrained
+ let avail_regs = Proc.num_available_registers.(cl) in
+ if avail_regs = 0 then
+ (* Don't bother computing the degree if there are no regs
+ in this class *)
+ unconstrained := Reg.Set.add reg !unconstrained
+ else begin
+ let deg = ref 0 in
+ List.iter
+ (fun r -> if not r.spill & Proc.register_class r = cl then incr deg)
+ reg.interf;
+ reg.degree <- !deg;
+ if !deg >= avail_regs
+ then constrained := Reg.Set.add reg !constrained
+ else unconstrained := Reg.Set.add reg !unconstrained
+ end
end
(* Remove a register from the interference graph *)
@@ -137,60 +144,62 @@ let assign_location reg =
let num_regs = Proc.num_available_registers.(cl) in
let last_reg = first_reg + num_regs in
let score = Array.create num_regs 0 in
- (* Favor the registers that have been assigned to pseudoregs for which
- we have a preference. If these pseudoregs have not been assigned
- already, avoid the registers with which they conflict. *)
- iter_preferred
- (fun r w ->
- match r.loc with
- Reg n -> if n >= first_reg & n < last_reg then
- score.(n - first_reg) <- score.(n - first_reg) + w
- | Unknown ->
- List.iter
- (fun neighbour ->
- match neighbour.loc with
- Reg n -> if n >= first_reg & n < last_reg then
- score.(n - first_reg) <- score.(n - first_reg) - w
- | _ -> ())
- r.interf
- | _ -> ())
- reg;
- List.iter
- (fun neighbour ->
- (* Prohibit the registers that have been assigned
- to our neighbours *)
- begin match neighbour.loc with
- Reg n -> if n >= first_reg & n < last_reg then
- score.(n - first_reg) <- (-1000000)
- | _ -> ()
- end;
- (* Avoid the registers that have been assigned to pseudoregs
- for which our neighbours have a preference *)
- iter_preferred
- (fun r w ->
- match r.loc with
- Reg n -> if n >= first_reg & n < last_reg then
- score.(n - first_reg) <- score.(n - first_reg) - (w - 1)
- (* w-1 to break the symmetry when two conflicting regs
- have the same preference for a third reg. *)
- | _ -> ())
- neighbour)
- reg.interf;
- (* Pick the register with the best score *)
let best_score = ref (-1000000) and best_reg = ref (-1) in
let start = start_register.(cl) in
- for n = start to num_regs - 1 do
- if score.(n) > !best_score then begin
- best_score := score.(n);
- best_reg := n
- end
- done;
- for n = 0 to start - 1 do
- if score.(n) > !best_score then begin
- best_score := score.(n);
- best_reg := n
- end
- done;
+ if num_regs > 0 then begin
+ (* Favor the registers that have been assigned to pseudoregs for which
+ we have a preference. If these pseudoregs have not been assigned
+ already, avoid the registers with which they conflict. *)
+ iter_preferred
+ (fun r w ->
+ match r.loc with
+ Reg n -> if n >= first_reg & n < last_reg then
+ score.(n - first_reg) <- score.(n - first_reg) + w
+ | Unknown ->
+ List.iter
+ (fun neighbour ->
+ match neighbour.loc with
+ Reg n -> if n >= first_reg & n < last_reg then
+ score.(n - first_reg) <- score.(n - first_reg) - w
+ | _ -> ())
+ r.interf
+ | _ -> ())
+ reg;
+ List.iter
+ (fun neighbour ->
+ (* Prohibit the registers that have been assigned
+ to our neighbours *)
+ begin match neighbour.loc with
+ Reg n -> if n >= first_reg & n < last_reg then
+ score.(n - first_reg) <- (-1000000)
+ | _ -> ()
+ end;
+ (* Avoid the registers that have been assigned to pseudoregs
+ for which our neighbours have a preference *)
+ iter_preferred
+ (fun r w ->
+ match r.loc with
+ Reg n -> if n >= first_reg & n < last_reg then
+ score.(n - first_reg) <- score.(n - first_reg) - (w - 1)
+ (* w-1 to break the symmetry when two conflicting regs
+ have the same preference for a third reg. *)
+ | _ -> ())
+ neighbour)
+ reg.interf;
+ (* Pick the register with the best score *)
+ for n = start to num_regs - 1 do
+ if score.(n) > !best_score then begin
+ best_score := score.(n);
+ best_reg := n
+ end
+ done;
+ for n = 0 to start - 1 do
+ if score.(n) > !best_score then begin
+ best_score := score.(n);
+ best_reg := n
+ end
+ done
+ end;
(* Found a register? *)
if !best_reg >= 0 then begin
reg.loc <- Reg(first_reg + !best_reg);
diff --git a/asmcomp/emit_i386.mlp b/asmcomp/emit_i386.mlp
index 5ff2ca0d3..a6e78d5f5 100644
--- a/asmcomp/emit_i386.mlp
+++ b/asmcomp/emit_i386.mlp
@@ -71,23 +71,10 @@ let emit_align =
"linux_elf" -> (fun n -> ` .align {emit_int n}\n`)
| _ -> (fun n -> ` .align {emit_int(Misc.log2 n)}\n`)
-(* Track the position of the floating-point stack *)
-
-let fp_offset = ref 0
-
-let push_fp () =
- incr fp_offset;
- if !fp_offset > 4 then fatal_error "Emit: float expression too complex"
-
-let pop_fp () =
- decr fp_offset
-
(* Output a pseudo-register *)
let emit_reg = function
- { loc = Reg r; typ = Float } ->
- emit_string (register_name(r + !fp_offset))
- | { loc = Reg r } ->
+ { loc = Reg r } ->
emit_string (register_name r)
| { loc = Stack s } as r ->
let ofs = slot_offset s (register_class r) in
@@ -122,17 +109,6 @@ let register_overlap reg arr =
with Exit ->
true
-(* Check if a set of registers contains a float *)
-
-let contains_floats arr =
- try
- for i = 0 to Array.length arr - 1 do
- if arr.(i).typ = Float then raise Exit
- done;
- false
- with Exit ->
- true
-
(* Output an addressing mode *)
let emit_addressing addr r n =
@@ -153,13 +129,6 @@ let emit_addressing addr r n =
if d <> 0 then emit_int d;
`({emit_reg r.(n)}, {emit_reg r.(n+1)}, {emit_int scale})`
-(* Emit the operand of a floating-point operation *)
-
-let emit_float_operand r =
- match r.loc with
- Stack s -> `l {emit_reg r}`
- | _ -> ` {emit_reg r}`
-
(* Record live pointers at call points *)
type frame_descr =
@@ -229,21 +198,21 @@ let instr_for_intop = function
let instr_for_floatop = function
Inegf -> "fchs"
| Iabsf -> "fabs"
- | Iaddf -> "fadd"
- | Isubf -> "fsub"
- | Imulf -> "fmul"
- | Idivf -> "fdiv"
- | Ispecific Isubfrev -> "fsubr"
- | Ispecific Idivfrev -> "fdivr"
+ | Iaddf -> "faddl"
+ | Isubf -> "fsubl"
+ | Imulf -> "fmull"
+ | Idivf -> "fdivl"
+ | Ispecific Isubfrev -> "fsubrl"
+ | Ispecific Idivfrev -> "fdivrl"
| _ -> fatal_error "Emit_i386: instr_for_floatop"
let instr_for_floatop_reversed = function
- Iaddf -> "fadd"
- | Isubf -> "fsubr"
- | Imulf -> "fmul"
- | Idivf -> "fdivr"
- | Ispecific Isubfrev -> "fsub"
- | Ispecific Idivfrev -> "fdiv"
+ Iaddf -> "faddl"
+ | Isubf -> "fsubrl"
+ | Imulf -> "fmull"
+ | Idivf -> "fdivrl"
+ | Ispecific Isubfrev -> "fsubl"
+ | Ispecific Idivfrev -> "fdivl"
| _ -> fatal_error "Emit_i386: instr_for_floatop_reversed"
let instr_for_floatop_pop = function
@@ -295,27 +264,23 @@ let range_check_trap = ref 0
let float_constants = ref ([] : (int * string) list)
+let tos = phys_reg 100
+
let emit_instr i =
match i.desc with
Lend -> ()
| Lop(Imove | Ispill | Ireload) ->
- if i.arg.(0).loc <> i.res.(0).loc then begin
- match i.arg.(0).typ with
- Int | Addr ->
- ` movl {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n`
- | Float ->
- begin match i.arg.(0).loc with
- Reg 100 -> (* top of FP stack *)
- ` fstpl {emit_reg i.res.(0)}\n`;
- pop_fp()
- | Reg 101 when !fp_offset = 0 -> (* %st(0) *)
- ` fstl {emit_reg i.res.(0)}\n`
- | _ ->
- ` fldl {emit_reg i.arg.(0)}\n`;
- push_fp();
- ` fstpl {emit_reg i.res.(0)}\n`;
- pop_fp()
- end
+ let src = i.arg.(0) and dst = i.res.(0) in
+ if src.loc <> dst.loc then begin
+ if src.typ = Float then
+ if src = tos then
+ ` fstpl {emit_reg dst}\n`
+ else begin
+ ` fldl {emit_reg src}\n`;
+ ` fstpl {emit_reg dst}\n`
+ end
+ else
+ ` movl {emit_reg src}, {emit_reg dst}\n`
end
| Lop(Iconst_int 0) ->
begin match i.res.(0).loc with
@@ -334,8 +299,7 @@ let emit_instr i =
let lbl = new_label() in
float_constants := (lbl, s) :: !float_constants;
` fldl {emit_label lbl}\n`
- end;
- push_fp()
+ end
| Lop(Iconst_symbol s) ->
` movl ${emit_symbol s}, {emit_reg i.res.(0)}\n`
| Lop(Icall_ind) ->
@@ -360,12 +324,6 @@ let emit_instr i =
` call {emit_symbol "caml_c_call"}\n`;
record_frame i.live
end else begin
- if contains_floats i.arg or contains_floats i.res then begin
- ` ffree %st(0)\n`;
- ` ffree %st(1)\n`;
- ` ffree %st(2)\n`;
- ` ffree %st(3)\n`
- end;
` call {emit_symbol s}\n`
end
| Lop(Istackoffset n) ->
@@ -396,23 +354,18 @@ let emit_instr i =
` movswl {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
end
| Float ->
- ` fldl {emit_addressing addr i.arg 0}\n`;
- push_fp()
+ ` fldl {emit_addressing addr i.arg 0}\n`
end
| Lop(Istore(Word, addr)) ->
begin match i.arg.(0).typ with
Int | Addr ->
` movl {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
| Float ->
- begin match i.arg.(0).loc with
- Reg 100 -> (* top of FP stack *)
- ` fstpl {emit_addressing addr i.arg 1}\n`;
- pop_fp()
- | Reg 101 when !fp_offset = 0 -> (* %st(0) *)
- ` fstl {emit_addressing addr i.arg 1}\n`
- | _ ->
- ` fldl {emit_reg i.arg.(0)}\n`;
- ` fstpl {emit_addressing addr i.arg 1}\n`
+ if i.arg.(0) = tos then
+ ` fstpl {emit_addressing addr i.arg 1}\n`
+ else begin
+ ` fldl {emit_reg i.arg.(0)}\n`;
+ ` fstpl {emit_addressing addr i.arg 1}\n`
end
end
| Lop(Istore(chunk, addr)) ->
@@ -500,25 +453,24 @@ let emit_instr i =
(* We have i.arg.(0) = i.res.(0) *)
` {emit_string(instr_for_intop op)} ${emit_int n}, {emit_reg i.res.(0)}\n`
| Lop(Inegf | Iabsf as floatop) ->
- if i.arg.(0).loc <> Reg 100 then begin
+ if i.arg.(0) <> tos then
` fldl {emit_reg i.arg.(0)}\n`;
- push_fp()
- end;
` {emit_string(instr_for_floatop floatop)}\n`
| Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev)
as floatop) ->
- begin match (i.arg.(0).loc, i.arg.(1).loc) with
- (Reg 100, Reg 100) -> (* both operands on top of FP stack *)
- ` {emit_string(instr_for_floatop_pop floatop)} %st(0), %st(1)\n`;
- pop_fp()
- | (Reg 100, _) -> (* first operand on stack *)
- ` {emit_string(instr_for_floatop floatop)}{emit_float_operand i.arg.(1)}\n`
- | (_, Reg 100) -> (* second operand on stack *)
- ` {emit_string(instr_for_floatop_reversed floatop)}{emit_float_operand i.arg.(0)}\n`
- | (_, _) -> (* both in regs or on stack *)
- ` fldl {emit_reg i.arg.(0)}\n`;
- push_fp();
- ` {emit_string(instr_for_floatop floatop)}{emit_float_operand i.arg.(1)}\n`
+ if i.arg.(0) = tos && i.arg.(1) = tos then
+ (* both operands on top of FP stack *)
+ ` {emit_string(instr_for_floatop_pop floatop)} %st(0), %st(1)\n`
+ else if i.arg.(0) = tos then
+ (* first operand on stack *)
+ ` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n`
+ else if i.arg.(1) = tos then
+ (* second operand on stack *)
+ ` {emit_string(instr_for_floatop_reversed floatop)} {emit_reg i.arg.(0)}\n`
+ else begin
+ (* both operands in memory *)
+ ` fldl {emit_reg i.arg.(0)}\n`;
+ ` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n`
end
| Lop(Ifloatofint) ->
begin match i.arg.(0).loc with
@@ -528,13 +480,10 @@ let emit_instr i =
` pushl {emit_reg i.arg.(0)}\n`;
` fildl (%esp)\n`;
` addl $4, %esp\n`
- end;
- push_fp()
+ end
| Lop(Iintoffloat) ->
- if i.arg.(0).loc <> Reg 100 then begin
+ if i.arg.(0) <> tos then
` fldl {emit_reg i.arg.(0)}\n`;
- push_fp()
- end;
stack_offset := !stack_offset - 8;
` subl $8, %esp\n`;
` fnstcw 4(%esp)\n`;
@@ -549,7 +498,6 @@ let emit_instr i =
` fistpl (%esp)\n`;
` movl (%esp), {emit_reg i.res.(0)}\n`
end;
- pop_fp();
` fldcw 4(%esp)\n`;
` addl $8, %esp\n`;
stack_offset := !stack_offset + 8
@@ -566,22 +514,14 @@ let emit_instr i =
for n = Array.length i.arg - 1 downto 0 do
let r = i.arg.(n) in
match r with
- {loc = Reg rn; typ = Float} ->
+ {loc = Reg _; typ = Float} ->
` subl $8, %esp\n`;
- stack_offset := !stack_offset + 8;
- begin match rn with
- 100 ->
- ` fstpl 0(%esp)\n`;
- pop_fp()
- | 101 when !fp_offset = 0 ->
- ` fstl 0(%esp)\n`
- | _ ->
- ` fldl {emit_reg r}\n`;
- ` fstpl 0(%esp)\n`
- end
+ ` fstpl 0(%esp)\n`;
+ stack_offset := !stack_offset + 8
| {loc = Stack sl; typ = Float} ->
- ` pushl 4+{emit_reg r}\n`;
- ` pushl 4+{emit_reg r}\n`;
+ let ofs = slot_offset sl 1 in
+ ` pushl {emit_int(ofs + 4)}(%esp)\n`;
+ ` pushl {emit_int(ofs + 4)}(%esp)\n`;
stack_offset := !stack_offset + 8
| _ ->
` pushl {emit_reg r}\n`;
@@ -601,10 +541,8 @@ let emit_instr i =
` pushl {emit_addressing addr i.arg 0}\n`;
stack_offset := !stack_offset + 4
| Lop(Ispecific(Ifloatarithmem(op, addr))) ->
- if i.arg.(0).loc <> Reg 100 then begin
+ if i.arg.(0) <> tos then
` fldl {emit_reg i.arg.(0)}\n`;
- push_fp()
- end;
` {emit_string(instr_for_floatarithmem op)} {emit_addressing addr i.arg 1}\n`
| Lreloadretaddr ->
()
@@ -636,54 +574,45 @@ let emit_instr i =
` cmpl ${emit_int n}, {emit_reg i.arg.(0)}\n`;
let b = name_for_cond_branch cmp in
` j{emit_string b} {emit_label lbl}\n`
+ | Ifloattest((Ceq | Cne as cmp), neg) ->
+ if i.arg.(1) <> tos then
+ ` fldl {emit_reg i.arg.(1)}\n`;
+ if i.arg.(0) <> tos then
+ ` fldl {emit_reg i.arg.(0)}\n`;
+ ` fucompp\n`;
+ ` fnstsw %ax\n`;
+ let neg1 = if cmp = Ceq then neg else not neg in
+ if neg1 then begin (* branch if different *)
+ ` andb $68, %ah\n`;
+ ` xorb $64, %ah\n`;
+ ` jne {emit_label lbl}\n`
+ end else begin (* branch if equal *)
+ ` andb $69, %ah\n`;
+ ` cmpb $64, %ah\n`;
+ ` je {emit_label lbl}\n`
+ end
| Ifloattest(cmp, neg) ->
- let instr =
- match cmp with
- Ceq | Cne -> "fucom"
- | _ -> "fcom" in
let actual_cmp =
- match (i.arg.(0).loc, i.arg.(1).loc) with
- (Reg 100, Reg 100) -> (* both args on top of FP stack *)
- ` {emit_string instr}pp\n`;
- fp_offset := !fp_offset - 2;
- cmp
- | (Reg 100, _) -> (* first arg on top of FP stack *)
- ` {emit_string instr}p{emit_float_operand i.arg.(1)}\n`;
- pop_fp();
- cmp
- | (_, Reg 100) -> (* second arg on top of FP stack *)
- ` {emit_string instr}p{emit_float_operand i.arg.(0)}\n`;
- pop_fp();
- Cmm.swap_comparison cmp
- | (_, _) ->
- ` fldl {emit_reg i.arg.(0)}\n`;
- push_fp();
- ` {emit_string instr}p{emit_float_operand i.arg.(1)}\n`;
- pop_fp();
- cmp in
+ if i.arg.(0) = tos && i.arg.(1) = tos then begin
+ (* both args on top of FP stack *)
+ ` fcompp\n`;
+ cmp
+ end else if i.arg.(0) = tos then begin
+ (* first arg on top of FP stack *)
+ ` fcompl {emit_reg i.arg.(1)}\n`;
+ cmp
+ end else if i.arg.(1) = tos then begin
+ (* second arg on top of FP stack *)
+ ` fcompl {emit_reg i.arg.(0)}\n`;
+ Cmm.swap_comparison cmp
+ end else begin
+ ` fldl {emit_reg i.arg.(0)}\n`;
+ ` fcompl {emit_reg i.arg.(1)}\n`;
+ cmp
+ end in
` fnstsw %ax\n`;
begin match actual_cmp with
- Ceq ->
- if neg then begin
- ` andb $68, %ah\n`;
- ` xorb $64, %ah\n`;
- ` jne `
- end else begin
- ` andb $69, %ah\n`;
- ` cmpb $64, %ah\n`;
- ` je `
- end
- | Cne ->
- if neg then begin
- ` andb $69, %ah\n`;
- ` cmpb $64, %ah\n`;
- ` je `
- end else begin
- ` andb $68, %ah\n`;
- ` xorb $64, %ah\n`;
- ` jne `
- end
- | Cle ->
+ Cle ->
` andb $69, %ah\n`;
` decb %ah\n`;
` cmpb $64, %ah\n`;
@@ -706,6 +635,7 @@ let emit_instr i =
if neg
then ` jne `
else ` je `
+ | _ -> fatal_error "Emit_i386: floattest"
end;
`{emit_label lbl}\n`
| Ioddtest ->
diff --git a/asmcomp/emit_i386nt.mlp b/asmcomp/emit_i386nt.mlp
index 0592fc9a1..e709f4bf6 100644
--- a/asmcomp/emit_i386nt.mlp
+++ b/asmcomp/emit_i386nt.mlp
@@ -45,7 +45,8 @@ let slot_offset loc cl =
else !stack_offset + num_stack_slots.(0) * 4 + n * 8
| Outgoing n -> n
-(* Record symbols used and defined - at the end generate extern for those used but not defined *)
+(* Record symbols used and defined - at the end generate extern for those
+ used but not defined *)
let symbols_defined = ref StringSet.empty
let symbols_used = ref StringSet.empty
@@ -68,23 +69,10 @@ let emit_label lbl =
let emit_align n = ` ALIGN {emit_int n}\n`
-(* Track the position of the floating-point stack *)
-
-let fp_offset = ref 0
-
-let push_fp () =
- incr fp_offset;
- if !fp_offset > 4 then fatal_error "Emit: float expression too complex"
-
-let pop_fp () =
- decr fp_offset
-
(* Output a pseudo-register *)
let emit_reg = function
- { loc = Reg r; typ = Float } ->
- emit_string (register_name(r + !fp_offset))
- | { loc = Reg r } ->
+ { loc = Reg r } ->
emit_string (register_name r)
| { loc = Stack s; typ = Float } as r ->
let ofs = slot_offset s (register_class r) in
@@ -122,17 +110,6 @@ let register_overlap reg arr =
with Exit ->
true
-(* Check if a set of registers contains a float *)
-
-let contains_floats arr =
- try
- for i = 0 to Array.length arr - 1 do
- if arr.(i).typ = Float then raise Exit
- done;
- false
- with Exit ->
- true
-
(* Output an addressing mode *)
let emit_signed_int d =
@@ -153,13 +130,6 @@ let emit_addressing addr r n =
| Iindexed2scaled(scale, d) ->
`[{emit_reg r.(n)}+{emit_reg r.(n+1)}*{emit_int scale}{emit_signed_int d}]`
-(* Emit the operand of a floating-point operation *)
-
-let emit_float_operand r =
- match r.loc with
- Reg _ -> `st, {emit_reg r}`
- | _ -> `{emit_reg r}`
-
(* Record live pointers at call points *)
type frame_descr =
@@ -286,27 +256,23 @@ let range_check_trap = ref 0
let float_constants = ref ([] : (int * string) list)
+let tos = phys_reg 100
+
let emit_instr i =
match i.desc with
Lend -> ()
| Lop(Imove | Ispill | Ireload) ->
- if i.arg.(0).loc <> i.res.(0).loc then begin
- match i.arg.(0).typ with
- Int | Addr ->
- ` mov {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
- | Float ->
- begin match i.arg.(0).loc with
- Reg 100 -> (* top of FP stack *)
- ` fstp {emit_reg i.res.(0)}\n`;
- pop_fp()
- | Reg 101 when !fp_offset = 0 -> (* %st(0) *)
- ` fst {emit_reg i.res.(0)}\n`
- | _ ->
- ` fld {emit_reg i.arg.(0)}\n`;
- push_fp();
- ` fstp {emit_reg i.res.(0)}\n`;
- pop_fp()
- end
+ let src = i.arg.(0) and dst = i.res.(0) in
+ if src.loc <> dst.loc then begin
+ if src.typ = Float then
+ if src = tos then
+ ` fstp {emit_reg dst}\n`
+ else begin
+ ` fld {emit_reg src}\n`;
+ ` fstp {emit_reg dst}\n`
+ end
+ else
+ ` movl {emit_reg dst}, {emit_reg src}\n`
end
| Lop(Iconst_int 0) ->
begin match i.res.(0).loc with
@@ -325,8 +291,7 @@ let emit_instr i =
let lbl = new_label() in
float_constants := (lbl, s) :: !float_constants;
` fld {emit_label lbl}\n`
- end;
- push_fp()
+ end
| Lop(Iconst_symbol s) ->
add_used_symbol s;
` mov {emit_reg i.res.(0)}, OFFSET {emit_symbol s}\n`
@@ -355,12 +320,6 @@ let emit_instr i =
` call _caml_c_call\n`;
record_frame i.live
end else begin
- if contains_floats i.arg or contains_floats i.res then begin
- ` ffree st(0)\n`;
- ` ffree st(1)\n`;
- ` ffree st(2)\n`;
- ` ffree st(3)\n`
- end;
` call {emit_symbol s}\n`
end
| Lop(Istackoffset n) ->
@@ -391,21 +350,16 @@ let emit_instr i =
` movsx {emit_reg dest}, WORD PTR {emit_addressing addr i.arg 0}\n`
end
| Float ->
- ` fld REAL8 PTR {emit_addressing addr i.arg 0}\n`;
- push_fp()
+ ` fld REAL8 PTR {emit_addressing addr i.arg 0}\n`
end
| Lop(Istore(Word, addr)) ->
begin match i.arg.(0).typ with
Int | Addr ->
` mov DWORD PTR {emit_addressing addr i.arg 1}, {emit_reg i.arg.(0)}\n`
| Float ->
- begin match i.arg.(0).loc with
- Reg 100 -> (* top of FP stack *)
- ` fstp REAL8 PTR {emit_addressing addr i.arg 1}\n`;
- pop_fp()
- | Reg 101 when !fp_offset = 0 -> (* %st(0) *)
- ` fst REAL8 PTR {emit_addressing addr i.arg 1}\n`
- | _ ->
+ if i.arg.(0) = tos then
+ ` fstp REAL8 PTR {emit_addressing addr i.arg 1}\n`
+ else begin
` fld {emit_reg i.arg.(0)}\n`;
` fstp REAL8 PTR {emit_addressing addr i.arg 1}\n`
end
@@ -495,25 +449,24 @@ let emit_instr i =
(* We have i.arg.(0) = i.res.(0) *)
` {emit_string(instr_for_intop op)} {emit_reg i.res.(0)}, {emit_int n}\n`
| Lop(Inegf | Iabsf as floatop) ->
- if i.arg.(0).loc <> Reg 100 then begin
+ if i.arg.(0) <> tos then begin
` fld {emit_reg i.arg.(0)}\n`;
- push_fp()
- end;
` {emit_string(instr_for_floatop floatop)}\n`
| Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev)
as floatop) ->
- begin match (i.arg.(0).loc, i.arg.(1).loc) with
- (Reg 100, Reg 100) -> (* both operands on top of FP stack *)
- ` {emit_string(instr_for_floatop_reversed floatop)}\n`;
- pop_fp()
- | (Reg 100, _) -> (* first operand on stack *)
- ` {emit_string(instr_for_floatop floatop)} {emit_float_operand i.arg.(1)}\n`
- | (_, Reg 100) -> (* second operand on stack *)
- ` {emit_string(instr_for_floatop_reversed floatop)} {emit_float_operand i.arg.(0)}\n`
- | (_, _) -> (* both in regs or on stack *)
- ` fld {emit_reg i.arg.(0)}\n`;
- push_fp();
- ` {emit_string(instr_for_floatop floatop)} {emit_float_operand i.arg.(1)}\n`
+ if i.arg.(0) = tos && i.arg.(1) = tos then
+ (* both operands on top of FP stack *)
+ ` {emit_string(instr_for_floatop_reversed floatop)}\n`
+ else if i.arg.(0) = tos then
+ (* first operand on stack *)
+ ` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n`
+ else if i.arg.(1) = tos then
+ (* second operand on stack *)
+ ` {emit_string(instr_for_floatop_reversed floatop)} {emit_reg i.arg.(0)}\n`
+ else begin
+ (* both operands in memory *)
+ ` fld {emit_reg i.arg.(0)}\n`;
+ ` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n`
end
| Lop(Ifloatofint) ->
begin match i.arg.(0).loc with
@@ -523,13 +476,10 @@ let emit_instr i =
` push {emit_reg i.arg.(0)}\n`;
` fild DWORD PTR [esp]\n`;
` add esp, 4\n`
- end;
- push_fp()
+ end
| Lop(Iintoffloat) ->
- if i.arg.(0).loc <> Reg 100 then begin
+ if i.arg.(0) <> tos then
` fld {emit_reg i.arg.(0)}\n`;
- push_fp()
- end;
stack_offset := !stack_offset - 8;
` sub esp, 8\n`;
` fnstcw [esp+4]\n`;
@@ -544,7 +494,6 @@ let emit_instr i =
` fistp DWORD PTR [esp]\n`;
` mov {emit_reg i.res.(0)}, [esp]\n`
end;
- pop_fp();
` fldcw [esp+4]\n`;
` add esp, 8\n`;
stack_offset := !stack_offset + 8
@@ -564,17 +513,8 @@ let emit_instr i =
match r with
{loc = Reg rn; typ = Float} ->
` sub esp, 8\n`;
- stack_offset := !stack_offset + 8;
- begin match rn with
- 100 ->
- ` fstp REAL8 PTR 0[esp]\n`;
- pop_fp()
- | 101 when !fp_offset = 0 ->
- ` fst REAL8 PTR 0[esp]\n`
- | _ ->
- ` fld {emit_reg r}\n`;
- ` fstp REAL8 PTR 0[esp]\n`
- end
+ ` fstp REAL8 PTR 0[esp]\n`;
+ stack_offset := !stack_offset + 8
| {loc = Stack sl; typ = Float} ->
let ofs = slot_offset sl 1 in
` push DWORD PTR {emit_int (ofs + 4)}[esp]\n`;
@@ -598,10 +538,8 @@ let emit_instr i =
` push DWORD PTR {emit_addressing addr i.arg 0}\n`;
stack_offset := !stack_offset + 4
| Lop(Ispecific(Ifloatarithmem(op, addr))) ->
- if i.arg.(0).loc <> Reg 100 then begin
+ if i.arg.(0) <> tos then
` fld {emit_reg i.arg.(0)}\n`;
- push_fp()
- end;
` {emit_string(instr_for_floatarithmem op)} REAL8 PTR {emit_addressing addr i.arg 1}\n`
| Lreloadretaddr ->
()
@@ -633,54 +571,45 @@ let emit_instr i =
` cmp {emit_reg i.arg.(0)}, {emit_int n}\n`;
let b = name_for_cond_branch cmp in
` j{emit_string b} {emit_label lbl}\n`
+ | Ifloattest((Ceq | Cne as cmp), neg) ->
+ if i.arg.(1) <> tos then
+ ` fld {emit_reg i.arg.(1)}\n`;
+ if i.arg.(0) <> tos then
+ ` fld {emit_reg i.arg.(0)}\n`;
+ ` fucompp\n`;
+ ` fnstsw ax\n`;
+ let neg1 = if cmp = Ceq then neg else not neg in
+ if neg1 then begin (* branch if different *)
+ ` and ah, 68\n`;
+ ` xor ah, 64\n`;
+ ` jne {emit_label lbl}\n`
+ end else begin (* branch if equal *)
+ ` and ah, 69\n`;
+ ` cmp ah, 64\n`;
+ ` je {emit_label lbl}\n`
+ end
| Ifloattest(cmp, neg) ->
- let instr =
- match cmp with
- Ceq | Cne -> "fucom"
- | _ -> "fcom" in
let actual_cmp =
- match (i.arg.(0).loc, i.arg.(1).loc) with
- (Reg 100, Reg 100) -> (* both args on top of FP stack *)
- ` {emit_string instr}pp\n`;
- fp_offset := !fp_offset - 2;
- cmp
- | (Reg 100, _) -> (* first arg on top of FP stack *)
- ` {emit_string instr}p {emit_reg i.arg.(1)}\n`;
- pop_fp();
- cmp
- | (_, Reg 100) -> (* second arg on top of FP stack *)
- ` {emit_string instr}p {emit_reg i.arg.(0)}\n`;
- pop_fp();
- Cmm.swap_comparison cmp
- | (_, _) ->
- ` fld {emit_reg i.arg.(0)}\n`;
- push_fp();
- ` {emit_string instr}p {emit_reg i.arg.(1)}\n`;
- pop_fp();
- cmp in
+ if i.arg.(0) = tos && i.arg.(1) = tos then begin
+ (* both args on top of FP stack *)
+ ` fcompp\n`;
+ cmp
+ end else if i.arg.(0) = tos then begin
+ (* first arg on top of FP stack *)
+ ` fcomp {emit_reg i.arg.(1)}\n`;
+ cmp
+ end else if i.arg.(1) = tos then begin
+ (* second arg on top of FP stack *)
+ ` fcomp {emit_reg i.arg.(0)}\n`;
+ Cmm.swap_comparison cmp
+ end else begin
+ ` fld {emit_reg i.arg.(0)}\n`;
+ ` fcomp {emit_reg i.arg.(1)}\n`;
+ cmp
+ end in
` fnstsw ax\n`;
begin match actual_cmp with
- Ceq ->
- if neg then begin
- ` and ah, 68\n`;
- ` xor ah, 64\n`;
- ` jne `
- end else begin
- ` and ah, 69\n`;
- ` cmp ah, 64\n`;
- ` je `
- end
- | Cne ->
- if neg then begin
- ` and ah, 69\n`;
- ` cmp ah, 64\n`;
- ` je `
- end else begin
- ` and ah, 68\n`;
- ` xor ah, 64\n`;
- ` jne `
- end
- | Cle ->
+ Cle ->
` and ah, 69\n`;
` dec ah\n`;
` cmp ah, 64\n`;
@@ -703,6 +632,7 @@ let emit_instr i =
if neg
then ` jne `
else ` je `
+ | _ -> fatal_error "Emit_i386: floattest"
end;
`{emit_label lbl}\n`
| Ioddtest ->
diff --git a/asmcomp/proc_i386.ml b/asmcomp/proc_i386.ml
index ba280f9f4..b29a657a1 100644
--- a/asmcomp/proc_i386.ml
+++ b/asmcomp/proc_i386.ml
@@ -31,19 +31,13 @@ open Mach
edi 5
ebp 6
- f0 - f3 101-104 function arguments and results
- f0: C function results
- not preserved by C
-
- The other 4 floating-point registers are treated as a stack.
- We use the pseudo-register %tos (100) to represent the top of that stack. *)
+ tos 100 top of floating-point stack. *)
let int_reg_name =
[| "%eax"; "%ebx"; "%ecx"; "%edx"; "%esi"; "%edi"; "%ebp" |]
let float_reg_name =
- [| "%tos"; "%st(0)"; "%st(1)"; "%st(2)"; "%st(3)";
- "%st(4)"; "%st(5)"; "%st(6)"; "%st(7)" |]
+ [| "%tos" |]
let num_register_classes = 2
@@ -53,15 +47,15 @@ let register_class r =
| Addr -> 0
| Float -> 1
-let num_available_registers = [| 7; 4 |]
+let num_available_registers = [| 7; 0 |]
-let first_available_register = [| 0; 101 |]
+let first_available_register = [| 0; 100 |]
let register_name r =
if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
-(* There is little scheduling, and some operations are more efficient when
- %eax or %st(0) are arguments *)
+(* There is little scheduling, and some operations are more compact
+ when their argument is %eax. *)
let rotate_registers = false
@@ -72,10 +66,7 @@ let hard_int_reg =
for i = 0 to 6 do v.(i) <- Reg.at_location Int (Reg i) done;
v
-let hard_float_reg =
- let v = Array.create 5 Reg.dummy in
- for i = 0 to 4 do v.(i) <- Reg.at_location Float (Reg(i + 100)) done;
- v
+let hard_float_reg = [| Reg.at_location Float (Reg 100) |]
let all_phys_regs =
Array.append hard_int_reg hard_float_reg
@@ -346,23 +337,23 @@ let outgoing ofs = Outgoing ofs
let not_supported ofs = fatal_error "Proc.loc_results: cannot call"
let loc_arguments arg =
- calling_conventions 0 5 101 104 outgoing arg
+ calling_conventions 0 5 100 99 outgoing arg
let loc_parameters arg =
- let (loc, ofs) = calling_conventions 0 5 101 104 incoming arg in loc
+ let (loc, ofs) = calling_conventions 0 5 100 99 incoming arg in loc
let loc_results res =
- let (loc, ofs) = calling_conventions 0 5 101 104 not_supported res in loc
+ let (loc, ofs) = calling_conventions 0 5 100 100 not_supported res in loc
let extcall_use_push = true
let loc_external_arguments arg =
fatal_error "Proc.loc_external_arguments"
let loc_external_results res =
- let (loc, ofs) = calling_conventions 0 0 101 101 not_supported res in loc
+ let (loc, ofs) = calling_conventions 0 0 100 100 not_supported res in loc
let loc_exn_bucket = eax
(* Registers destroyed by operations *)
let destroyed_at_c_call = (* ebx, esi, edi, ebp preserved *)
- Array.of_list(List.map phys_reg [0;2;3;100;101;102;103;104])
+ Array.of_list(List.map phys_reg [0;2;3])
let destroyed_at_oper = function
Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs
@@ -382,13 +373,13 @@ let destroyed_at_raise = all_phys_regs
let safe_register_pressure op = 4
let max_register_pressure = function
- Iextcall(_, _) -> [| 4; 0 |]
- | Iintop(Idiv | Imod) -> [| 5; 4 |]
+ Iextcall(_, _) -> [| 4; max_int |]
+ | Iintop(Idiv | Imod) -> [| 5; max_int |]
| Ialloc _ | Iintop(Icomp _) | Iintop_imm(Icomp _, _) |
- Iintoffloat -> [| 6; 4 |]
- | _ -> [|7; 4|]
+ Iintoffloat -> [| 6; max_int |]
+ | _ -> [|7; max_int |]
-(* Reloading of instruction arguments, storing of instruction results *)
+(* Reloading of instruction arguments, storing of instruction results. *)
let stackp r =
match r.loc with
@@ -403,6 +394,10 @@ let reload_test makereg tst arg =
else arg
| _ -> arg
+(* Since #floatregs = 0, pseudoregs of type float will never be reloaded.
+ Hence there is no need to make special cases for
+ floating-point operations. *)
+
let reload_operation makereg op arg res =
match op with
Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor|Icomp _|Icheckbound) ->
@@ -411,21 +406,16 @@ let reload_operation makereg op arg res =
then ([|arg.(0); makereg arg.(1)|], res)
else (arg, res)
| Iintop(Ilsl|Ilsr|Iasr) | Iintop_imm(_, _) | Ifloatofint | Iintoffloat |
- Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf | Ispecific(Ipush) ->
+ Ispecific(Ipush) ->
(* The argument(s) can be either in register or on stack *)
(arg, res)
- | Ispecific(Ifloatarithmem(_, _)) ->
- (* First arg can be either in register or on stack, but remaining
- arguments must be in registers *)
- let newarg = Array.create (Array.length arg) arg.(0) in
- for i = 1 to Array.length arg - 1 do newarg.(i) <- makereg arg.(i) done;
- (newarg, res)
| _ -> (* Other operations: all args and results in registers *)
raise Use_default
(* Scheduling is turned off because our model does not fit the 486
nor Pentium very well. In particular, it messes up with the
- float reg stack. *)
+ float reg stack. The Pentium Pro schedules at run-time much better
+ than what we could do. *)
let need_scheduling = false
diff --git a/asmcomp/proc_i386nt.ml b/asmcomp/proc_i386nt.ml
index 6037fe28d..0a787c9d5 100644
--- a/asmcomp/proc_i386nt.ml
+++ b/asmcomp/proc_i386nt.ml
@@ -31,19 +31,13 @@ open Mach
edi 5
ebp 6
- f0 - f3 101-104 function arguments and results
- f0: C function results
- not preserved by C
-
- The other 4 floating-point registers are treated as a stack.
- We use the pseudo-register %tos (100) to represent the top of that stack. *)
+ tos 100 top of floating-point stack. *)
let int_reg_name =
[| "eax"; "ebx"; "ecx"; "edx"; "esi"; "edi"; "ebp" |]
let float_reg_name =
- [| "tos"; "st(0)"; "st(1)"; "st(2)"; "st(3)";
- "st(4)"; "st(5)"; "st(6)"; "st(7)" |]
+ [| "tos" |]
let num_register_classes = 2
@@ -53,15 +47,15 @@ let register_class r =
| Addr -> 0
| Float -> 1
-let num_available_registers = [| 7; 4 |]
+let num_available_registers = [| 7; 0 |]
-let first_available_register = [| 0; 101 |]
+let first_available_register = [| 0; 100 |]
let register_name r =
if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
-(* There is little scheduling, and some operations are more efficient when
- eax or st(0) are arguments *)
+(* There is little scheduling, and some operations are more compact
+ when their argument is %eax. *)
let rotate_registers = false
@@ -72,10 +66,7 @@ let hard_int_reg =
for i = 0 to 6 do v.(i) <- Reg.at_location Int (Reg i) done;
v
-let hard_float_reg =
- let v = Array.create 5 Reg.dummy in
- for i = 0 to 4 do v.(i) <- Reg.at_location Float (Reg(i + 100)) done;
- v
+let hard_float_reg = [| Reg.at_location Float (Reg 100) |]
let all_phys_regs =
Array.append hard_int_reg hard_float_reg
@@ -346,23 +337,23 @@ let outgoing ofs = Outgoing ofs
let not_supported ofs = fatal_error "Proc.loc_results: cannot call"
let loc_arguments arg =
- calling_conventions 0 5 101 104 outgoing arg
+ calling_conventions 0 5 100 99 outgoing arg
let loc_parameters arg =
- let (loc, ofs) = calling_conventions 0 5 101 104 incoming arg in loc
+ let (loc, ofs) = calling_conventions 0 5 100 99 incoming arg in loc
let loc_results res =
- let (loc, ofs) = calling_conventions 0 5 101 104 not_supported res in loc
+ let (loc, ofs) = calling_conventions 0 5 100 100 not_supported res in loc
let extcall_use_push = true
let loc_external_arguments arg =
fatal_error "Proc.loc_external_arguments"
let loc_external_results res =
- let (loc, ofs) = calling_conventions 0 0 101 101 not_supported res in loc
+ let (loc, ofs) = calling_conventions 0 0 100 100 not_supported res in loc
let loc_exn_bucket = eax
(* Registers destroyed by operations *)
let destroyed_at_c_call = (* ebx, esi, edi, ebp preserved *)
- Array.of_list(List.map phys_reg [0;2;3;100;101;102;103;104])
+ Array.of_list(List.map phys_reg [0;2;3])
let destroyed_at_oper = function
Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs
@@ -382,11 +373,11 @@ let destroyed_at_raise = all_phys_regs
let safe_register_pressure op = 4
let max_register_pressure = function
- Iextcall(_, _) -> [| 4; 0 |]
- | Iintop(Idiv | Imod) -> [| 5; 4 |]
+ Iextcall(_, _) -> [| 4; max_int |]
+ | Iintop(Idiv | Imod) -> [| 5; max_int |]
| Ialloc _ | Iintop(Icomp _) | Iintop_imm(Icomp _, _) |
- Iintoffloat -> [| 6; 4 |]
- | _ -> [|7; 4|]
+ Iintoffloat -> [| 6; max_int |]
+ | _ -> [|7; max_int |]
(* Reloading of instruction arguments, storing of instruction results *)
@@ -403,6 +394,10 @@ let reload_test makereg tst arg =
else arg
| _ -> arg
+(* Since #floatregs = 0, pseudoregs of type float will never be reloaded.
+ Hence there is no need to make special cases for
+ floating-point operations. *)
+
let reload_operation makereg op arg res =
match op with
Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor|Icomp _|Icheckbound) ->
@@ -411,15 +406,9 @@ let reload_operation makereg op arg res =
then ([|arg.(0); makereg arg.(1)|], res)
else (arg, res)
| Iintop(Ilsl|Ilsr|Iasr) | Iintop_imm(_, _) | Ifloatofint | Iintoffloat |
- Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf | Ispecific(Ipush) ->
+ Ispecific(Ipush) ->
(* The argument(s) can be either in register or on stack *)
(arg, res)
- | Ispecific(Ifloatarithmem(_, _)) ->
- (* First arg can be either in register or on stack, but remaining
- arguments must be in registers *)
- let newarg = Array.create (Array.length arg) arg.(0) in
- for i = 1 to Array.length arg - 1 do newarg.(i) <- makereg arg.(i) done;
- (newarg, res)
| _ -> (* Other operations: all args and results in registers *)
raise Use_default
diff --git a/asmcomp/reload.ml b/asmcomp/reload.ml
index 7c559b2dc..c57631d1d 100644
--- a/asmcomp/reload.ml
+++ b/asmcomp/reload.ml
@@ -33,7 +33,10 @@ let makereg r =
match r.loc with
Unknown -> fatal_error "Reload.makereg"
| Reg _ -> r
- | Stack _ -> redo_regalloc := true; Reg.clone r
+ | Stack _ ->
+ if Proc.num_available_registers.(Proc.register_class r) = 0
+ then r
+ else begin redo_regalloc := true; Reg.clone r end
let makeregs rv =
let n = Array.length rv in
diff --git a/asmrun/i386.S b/asmrun/i386.S
index 4902addde..7980afbce 100644
--- a/asmrun/i386.S
+++ b/asmrun/i386.S
@@ -138,16 +138,10 @@ L103: subl G(young_ptr), %eax /* eax = - size */
.align FUNCTION_ALIGN
G(caml_c_call):
/* Record lowest stack address and return address */
- /* In parallel, free the floating point registers */
- /* (Pairing is expected on the Pentium.) */
movl (%esp), %edx
- ffree %st(0)
movl %edx, G(caml_last_return_address)
- ffree %st(1)
leal 4(%esp), %edx
- ffree %st(2)
movl %edx, G(caml_bottom_of_stack)
- ffree %st(3)
/* Call the function (address in %eax) */
jmp *%eax
@@ -223,17 +217,11 @@ L107:
used by caml_c_call */
popl G(caml_bottom_of_stack)
popl G(caml_last_return_address)
- /* Restore callee-save registers.
- In parallel, free the floating-point registers
- that may have been used by Caml. */
+ /* Restore callee-save registers. */
popl %ebp
- ffree %st(0)
popl %edi
- ffree %st(1)
popl %esi
- ffree %st(2)
popl %ebx
- ffree %st(3)
/* Return to caller. */
ret
L108:
diff --git a/asmrun/i386nt.asm b/asmrun/i386nt.asm
index fbd3277e4..4eaa7ba15 100644
--- a/asmrun/i386nt.asm
+++ b/asmrun/i386nt.asm
@@ -48,6 +48,32 @@ _caml_exception_pointer DWORD 0
PUBLIC _caml_alloc
PUBLIC _caml_call_gc
+_caml_call_gc:
+ ; Record lowest stack address and return address
+ mov eax, [esp]
+ mov _caml_last_return_address, eax
+ lea eax, [esp+4]
+ mov _caml_bottom_of_stack, eax
+ ; Save all regs used by the code generator
+L105: mov _gc_entry_regs + 4, ebx
+ mov _gc_entry_regs + 8, ecx
+ mov _gc_entry_regs + 12, edx
+ mov _gc_entry_regs + 16, esi
+ mov _gc_entry_regs + 20, edi
+ mov _gc_entry_regs + 24, ebp
+ ; Call the garbage collector
+ call _garbage_collection
+ ; Restore all regs used by the code generator
+ mov ebx, _gc_entry_regs + 4
+ mov ecx, _gc_entry_regs + 8
+ mov edx, _gc_entry_regs + 12
+ mov esi, _gc_entry_regs + 16
+ mov edi, _gc_entry_regs + 20
+ mov ebp, _gc_entry_regs + 24
+ ; Return to caller
+ push _caml_last_return_address
+ ret
+
ALIGN 4
_caml_alloc1:
mov eax, _young_ptr
@@ -56,8 +82,12 @@ _caml_alloc1:
cmp eax, _young_limit
jb L100
ret
-L100: mov eax, 8
- jmp L105
+L100: mov eax, [esp]
+ mov _caml_last_return_address, eax
+ lea eax, [esp+4]
+ mov _caml_bottom_of_stack, eax
+ call L105
+ jmp _caml_alloc1
ALIGN 4
_caml_alloc2:
@@ -67,8 +97,12 @@ _caml_alloc2:
cmp eax, _young_limit
jb L101
ret
-L101: mov eax, 12
- jmp L105
+L101: mov eax, [esp]
+ mov _caml_last_return_address, eax
+ lea eax, [esp+4]
+ mov _caml_bottom_of_stack, eax
+ call L105
+ jmp _caml_alloc2
ALIGN 4
_caml_alloc3:
@@ -78,59 +112,31 @@ _caml_alloc3:
cmp eax, _young_limit
jb L102
ret
-L102: mov eax, 16
- jmp L105
+L102: mov eax, [esp]
+ mov _caml_last_return_address, eax
+ lea eax, [esp+4]
+ mov _caml_bottom_of_stack, eax
+ call L105
+ jmp _caml_alloc3
ALIGN 4
_caml_alloc:
- push eax
- mov eax, _young_ptr
- sub eax, [esp]
- mov _young_ptr, eax
- cmp eax, _young_limit
- jb L103
- add esp, 4
- ret
-L103: pop eax
- jmp L105
-
-_caml_call_gc:
- ; Adjust return address and recover desired size in eax
- pop eax
- add eax, 2
- push eax
- movzx eax, WORD PTR [eax-2]
-L105:
- ; Record lowest stack address and return address
- pop _caml_last_return_address
- mov _caml_bottom_of_stack, esp
- ; Save all regs used by the code generator
- mov _gc_entry_regs + 4, ebx
- mov _gc_entry_regs + 8, ecx
- mov _gc_entry_regs + 12, edx
- mov _gc_entry_regs + 16, esi
- mov _gc_entry_regs + 20, edi
- mov _gc_entry_regs + 24, ebp
- ; Save desired size
- push eax
- ; Call the garbage collector
- call _garbage_collection
- ; Restore all regs used by the code generator
- mov ebx, _gc_entry_regs + 4
- mov ecx, _gc_entry_regs + 8
- mov edx, _gc_entry_regs + 12
- mov esi, _gc_entry_regs + 16
- mov edi, _gc_entry_regs + 20
- mov ebp, _gc_entry_regs + 24
- ; Recover desired size
- pop eax
- ; Decrement young_ptr by desired size
- sub _young_ptr, eax
- ; Reload result of allocation in %eax
- mov eax, _young_ptr
- ; Return to caller
- push _caml_last_return_address
- ret
+ sub eax, _young_ptr ; eax = size - young_ptr
+ neg eax ; eax = young_ptr - size
+ cmp eax, _young_limit
+ jb L103
+ mov _young_ptr, eax
+ ret
+L103: sub eax, _young_ptr ; eax = - size
+ neg eax ; eax = size
+ push eax ; save desired size
+ mov eax, [esp+4]
+ mov _caml_last_return_address, eax
+ lea eax, [esp+8]
+ mov _caml_bottom_of_stack, eax
+ call L105
+ pop eax ; recover desired size
+ jmp _caml_alloc
; Call a C function from Caml
@@ -138,16 +144,10 @@ L105:
ALIGN 4
_caml_c_call:
; Record lowest stack address and return address
- ; In parallel, free the floating point registers
- ; (Pairing is expected on the Pentium.)
mov edx, [esp]
- ffree st(0)
mov _caml_last_return_address, edx
- ffree st(1)
lea edx, [esp+4]
- ffree st(2)
mov _caml_bottom_of_stack, edx
- ffree st(3)
; Call the function (address in %eax)
jmp eax
@@ -224,16 +224,10 @@ L107:
pop _caml_bottom_of_stack
pop _caml_last_return_address
; Restore callee-save registers.
- ; In parallel, free the floating-point registers
- ; that may have been used by Caml.
pop ebp
- ffree st(0)
pop edi
- ffree st(1)
pop esi
- ffree st(2)
pop ebx
- ffree st(3)
; Return to caller.
ret
L108: