1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
|
(***********************************************************************)
(* *)
(* OCaml *)
(* *)
(* Benedikt Meurer, University of Siegen *)
(* *)
(* Copyright 1998 Institut National de Recherche en Informatique *)
(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
(* reserved. This file is distributed under the terms of the Q *)
(* Public License version 1.0. *)
(* *)
(***********************************************************************)
(* Instruction selection for the ARM processor *)
open Arch
open Proc
open Cmm
open Mach
let is_offset chunk n =
match chunk with
(* VFPv{2,3} load/store have -1020 to 1020 *)
Single | Double | Double_u
when !fpu >= VFPv2 ->
n >= -1020 && n <= 1020
(* ARM load/store byte/word have -4095 to 4095 *)
| Byte_unsigned | Byte_signed
| Thirtytwo_unsigned | Thirtytwo_signed
| Word | Single
when not !thumb ->
n >= -4095 && n <= 4095
(* Thumb-2 load/store have -255 to 4095 *)
| _ when !arch > ARMv6 && !thumb ->
n >= -255 && n <= 4095
(* Everything else has -255 to 255 *)
| _ ->
n >= -255 && n <= 255
let select_shiftop = function
Clsl -> Ishiftlogicalleft
| Clsr -> Ishiftlogicalright
| Casr -> Ishiftarithmeticright
| __-> assert false
(* Special constraints on operand and result registers *)
exception Use_default
let r1 = phys_reg 1
let r12 = phys_reg 8
let pseudoregs_for_operation op arg res =
match op with
(* For mul rd,rm,rs and mla rd,rm,rs,ra (pre-ARMv6) the registers rm
and rd must be different. We deal with this by pretending that rm
is also a result of the mul / mla operation. *)
Iintop Imul | Ispecific Imuladd when !arch < ARMv6 ->
(arg, [| res.(0); arg.(0) |])
(* For smull rdlo,rdhi,rn,rm (pre-ARMv6) the registers rdlo, rdhi and rn
must be different. We deal with this by pretending that rn is also a
result of the smull operation. *)
| Iintop Imulh when !arch < ARMv6 ->
(arg, [| res.(0); arg.(0) |])
(* Soft-float Iabsf and Inegf: arg.(0) and res.(0) must be the same *)
| Iabsf | Inegf when !fpu = Soft ->
([|res.(0); arg.(1)|], res)
(* VFPv{2,3} Imuladdf...Inegmulsubf: arg.(0) and res.(0) must be the same *)
| Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf) ->
let arg' = Array.copy arg in
arg'.(0) <- res.(0);
(arg', res)
(* We use __aeabi_idivmod for Cmodi only, and hence we care only
for the remainder in r1, so fix up the destination register. *)
| Iextcall("__aeabi_idivmod", false) ->
(arg, [|r1|])
(* Other instructions are regular *)
| _ -> raise Use_default
(* Instruction selection *)
class selector = object(self)
inherit Selectgen.selector_generic as super
method! regs_for tyv =
Reg.createv (if !fpu = Soft then begin
(* Expand floats into pairs of integer registers *)
let rec expand = function
[] -> []
| Float :: tyl -> Int :: Int :: expand tyl
| ty :: tyl -> ty :: expand tyl in
Array.of_list (expand (Array.to_list tyv))
end else begin
tyv
end)
method is_immediate n =
is_immediate (Int32.of_int n)
method! is_simple_expr = function
(* inlined floating-point ops are simple if their arguments are *)
| Cop(Cextcall("sqrt", _, _, _), args) when !fpu >= VFPv2 ->
List.for_all self#is_simple_expr args
(* inlined byte-swap ops are simple if their arguments are *)
| Cop(Cextcall("caml_bswap16_direct", _, _, _), args) when !arch >= ARMv6T2 ->
List.for_all self#is_simple_expr args
| Cop(Cextcall("caml_int32_direct_bswap", _,_,_), args) when !arch >= ARMv6 ->
List.for_all self#is_simple_expr args
| e -> super#is_simple_expr e
method select_addressing chunk = function
| Cop(Cadda, [arg; Cconst_int n])
when is_offset chunk n ->
(Iindexed n, arg)
| Cop(Cadda, [arg1; Cop(Caddi, [arg2; Cconst_int n])])
when is_offset chunk n ->
(Iindexed n, Cop(Cadda, [arg1; arg2]))
| arg ->
(Iindexed 0, arg)
method select_shift_arith op arithop arithrevop args =
match args with
[arg1; Cop(Clsl | Clsr | Casr as op, [arg2; Cconst_int n])]
when n > 0 && n < 32 ->
(Ispecific(Ishiftarith(arithop, select_shiftop op, n)), [arg1; arg2])
| [Cop(Clsl | Clsr | Casr as op, [arg1; Cconst_int n]); arg2]
when n > 0 && n < 32 ->
(Ispecific(Ishiftarith(arithrevop, select_shiftop op, n)), [arg2; arg1])
| args ->
begin match super#select_operation op args with
(* Recognize multiply high and add *)
(Iintop Iadd, [Cop(Cmulhi, args); arg3])
| (Iintop Iadd, [arg3; Cop(Cmulhi, args)]) as op_args
when !arch >= ARMv6 ->
begin match self#select_operation Cmulhi args with
(Iintop Imulh, [arg1; arg2]) ->
(Ispecific Imulhadd, [arg1; arg2; arg3])
| _ -> op_args
end
(* Recognize multiply and add *)
| (Iintop Iadd, [Cop(Cmuli, args); arg3])
| (Iintop Iadd, [arg3; Cop(Cmuli, args)]) as op_args ->
begin match self#select_operation Cmuli args with
(Iintop Imul, [arg1; arg2]) ->
(Ispecific Imuladd, [arg1; arg2; arg3])
| _ -> op_args
end
(* Recognize multiply and subtract *)
| (Iintop Isub, [arg3; Cop(Cmuli, args)]) as op_args
when !arch > ARMv6 ->
begin match self#select_operation Cmuli args with
(Iintop Imul, [arg1; arg2]) ->
(Ispecific Imulsub, [arg1; arg2; arg3])
| _ -> op_args
end
| op_args -> op_args
end
method! select_operation op args =
match (op, args) with
(* Recognize special shift arithmetic *)
((Cadda | Caddi), [arg; Cconst_int n])
when n < 0 && self#is_immediate (-n) ->
(Iintop_imm(Isub, -n), [arg])
| ((Cadda | Caddi as op), args) ->
self#select_shift_arith op Ishiftadd Ishiftadd args
| ((Csuba | Csubi), [arg; Cconst_int n])
when n < 0 && self#is_immediate (-n) ->
(Iintop_imm(Iadd, -n), [arg])
| ((Csuba | Csubi), [Cconst_int n; arg])
when self#is_immediate n ->
(Ispecific(Irevsubimm n), [arg])
| ((Csuba | Csubi as op), args) ->
self#select_shift_arith op Ishiftsub Ishiftsubrev args
| (Cand as op, args) ->
self#select_shift_arith op Ishiftand Ishiftand args
| (Cor as op, args) ->
self#select_shift_arith op Ishiftor Ishiftor args
| (Cxor as op, args) ->
self#select_shift_arith op Ishiftxor Ishiftxor args
| (Ccheckbound _, [Cop(Clsl | Clsr | Casr as op, [arg1; Cconst_int n]); arg2])
when n > 0 && n < 32 ->
(Ispecific(Ishiftcheckbound(select_shiftop op, n)), [arg1; arg2])
(* ARM does not support immediate operands for multiplication *)
| (Cmuli, args) ->
(Iintop Imul, args)
| (Cmulhi, args) ->
(Iintop Imulh, args)
(* Turn integer division/modulus into runtime ABI calls *)
| (Cdivi, args) ->
(Iextcall("__aeabi_idiv", false), args)
| (Cmodi, args) ->
(* See above for fix up of return register *)
(Iextcall("__aeabi_idivmod", false), args)
(* Recognize 16-bit bswap instruction (ARMv6T2 because we need movt) *)
| (Cextcall("caml_bswap16_direct", _, _, _), args) when !arch >= ARMv6T2 ->
(Ispecific(Ibswap 16), args)
(* Recognize 32-bit bswap instructions (ARMv6 and above) *)
| (Cextcall("caml_int32_direct_bswap", _, _, _), args) when !arch >= ARMv6 ->
(Ispecific(Ibswap 32), args)
(* Turn floating-point operations into runtime ABI calls for softfp *)
| (op, args) when !fpu = Soft -> self#select_operation_softfp op args
(* Select operations for VFPv{2,3} *)
| (op, args) -> self#select_operation_vfpv3 op args
method private select_operation_softfp op args =
match (op, args) with
(* Turn floating-point operations into runtime ABI calls *)
| (Caddf, args) -> (Iextcall("__aeabi_dadd", false), args)
| (Csubf, args) -> (Iextcall("__aeabi_dsub", false), args)
| (Cmulf, args) -> (Iextcall("__aeabi_dmul", false), args)
| (Cdivf, args) -> (Iextcall("__aeabi_ddiv", false), args)
| (Cfloatofint, args) -> (Iextcall("__aeabi_i2d", false), args)
| (Cintoffloat, args) -> (Iextcall("__aeabi_d2iz", false), args)
| (Ccmpf comp, args) ->
let func = (match comp with
Cne (* there's no __aeabi_dcmpne *)
| Ceq -> "__aeabi_dcmpeq"
| Clt -> "__aeabi_dcmplt"
| Cle -> "__aeabi_dcmple"
| Cgt -> "__aeabi_dcmpgt"
| Cge -> "__aeabi_dcmpge") in
let comp = (match comp with
Cne -> Ceq (* eq 0 => false *)
| _ -> Cne (* ne 0 => true *)) in
(Iintop_imm(Icomp(Iunsigned comp), 0),
[Cop(Cextcall(func, typ_int, false, Debuginfo.none), args)])
(* Add coercions around loads and stores of 32-bit floats *)
| (Cload Single, args) ->
(Iextcall("__aeabi_f2d", false), [Cop(Cload Word, args)])
| (Cstore Single, [arg1; arg2]) ->
let arg2' =
Cop(Cextcall("__aeabi_d2f", typ_int, false, Debuginfo.none),
[arg2]) in
self#select_operation (Cstore Word) [arg1; arg2']
(* Other operations are regular *)
| (op, args) -> super#select_operation op args
method private select_operation_vfpv3 op args =
match (op, args) with
(* Recognize floating-point negate and multiply *)
(Cnegf, [Cop(Cmulf, args)]) ->
(Ispecific Inegmulf, args)
(* Recognize floating-point multiply and add *)
| (Caddf, [arg; Cop(Cmulf, args)])
| (Caddf, [Cop(Cmulf, args); arg]) ->
(Ispecific Imuladdf, arg :: args)
(* Recognize floating-point negate, multiply and subtract *)
| (Csubf, [Cop(Cnegf, [arg]); Cop(Cmulf, args)])
| (Csubf, [Cop(Cnegf, [Cop(Cmulf, args)]); arg]) ->
(Ispecific Inegmulsubf, arg :: args)
(* Recognize floating-point negate, multiply and add *)
| (Csubf, [arg; Cop(Cmulf, args)]) ->
(Ispecific Inegmuladdf, arg :: args)
(* Recognize multiply and subtract *)
| (Csubf, [Cop(Cmulf, args); arg]) ->
(Ispecific Imulsubf, arg :: args)
(* Recognize floating-point square root *)
| (Cextcall("sqrt", _, false, _), args) ->
(Ispecific Isqrtf, args)
(* Other operations are regular *)
| (op, args) -> super#select_operation op args
method! select_condition = function
(* Turn floating-point comparisons into runtime ABI calls *)
Cop(Ccmpf _ as op, args) when !fpu = Soft ->
begin match self#select_operation_softfp op args with
(Iintop_imm(Icomp(Iunsigned Ceq), 0), [arg]) -> (Ifalsetest, arg)
| (Iintop_imm(Icomp(Iunsigned Cne), 0), [arg]) -> (Itruetest, arg)
| _ -> assert false
end
| expr ->
super#select_condition expr
(* Deal with some register constraints *)
method! insert_op_debug op dbg rs rd =
try
let (rsrc, rdst) = pseudoregs_for_operation op rs rd in
self#insert_moves rs rsrc;
self#insert_debug (Iop op) dbg rsrc rdst;
self#insert_moves rdst rd;
rd
with Use_default ->
super#insert_op_debug op dbg rs rd
end
let fundecl f = (new selector)#emit_fundecl f
|