summaryrefslogtreecommitdiffstats
path: root/stdlib
diff options
context:
space:
mode:
Diffstat (limited to 'stdlib')
-rw-r--r--stdlib/.depend171
-rwxr-xr-xstdlib/Compflags7
-rw-r--r--stdlib/Makefile21
-rw-r--r--stdlib/Makefile.nt2
-rwxr-xr-xstdlib/Makefile.shared25
-rw-r--r--stdlib/StdlibModules4
-rw-r--r--stdlib/arg.ml8
-rw-r--r--stdlib/arg.mli2
-rw-r--r--stdlib/array.mli2
-rw-r--r--stdlib/arrayLabels.mli6
-rw-r--r--stdlib/buffer.ml46
-rw-r--r--stdlib/buffer.mli41
-rw-r--r--stdlib/bytes.ml253
-rw-r--r--stdlib/bytes.mli398
-rw-r--r--stdlib/bytesLabels.ml16
-rw-r--r--stdlib/bytesLabels.mli213
-rw-r--r--stdlib/camlinternalFormat.ml2644
-rw-r--r--stdlib/camlinternalFormat.mli104
-rw-r--r--stdlib/camlinternalFormatBasics.ml614
-rw-r--r--stdlib/camlinternalFormatBasics.mli287
-rw-r--r--stdlib/camlinternalOO.ml6
-rw-r--r--stdlib/digest.ml28
-rw-r--r--stdlib/digest.mli10
-rw-r--r--stdlib/filename.mli7
-rw-r--r--stdlib/format.ml494
-rw-r--r--stdlib/format.mli12
-rw-r--r--stdlib/gc.mli9
-rw-r--r--stdlib/genlex.ml16
-rw-r--r--stdlib/hashtbl.mli28
-rw-r--r--stdlib/header.c2
-rw-r--r--stdlib/lazy.mli6
-rw-r--r--stdlib/lexing.ml47
-rw-r--r--stdlib/lexing.mli10
-rw-r--r--stdlib/list.mli24
-rw-r--r--stdlib/listLabels.mli24
-rw-r--r--stdlib/map.mli20
-rw-r--r--stdlib/marshal.ml35
-rw-r--r--stdlib/marshal.mli98
-rw-r--r--stdlib/nativeint.mli4
-rw-r--r--stdlib/obj.ml34
-rw-r--r--stdlib/obj.mli12
-rw-r--r--stdlib/parsing.ml16
-rw-r--r--stdlib/pervasives.ml125
-rw-r--r--stdlib/pervasives.mli97
-rw-r--r--stdlib/printexc.ml161
-rw-r--r--stdlib/printexc.mli163
-rw-r--r--stdlib/printf.ml751
-rw-r--r--stdlib/printf.mli73
-rw-r--r--stdlib/queue.mli2
-rw-r--r--stdlib/scanf.ml1042
-rw-r--r--stdlib/scanf.mli10
-rw-r--r--stdlib/set.ml3
-rw-r--r--stdlib/set.mli21
-rw-r--r--stdlib/sort.mli3
-rw-r--r--stdlib/stdLabels.ml2
-rw-r--r--stdlib/stdLabels.mli6
-rw-r--r--stdlib/stream.ml19
-rw-r--r--stdlib/stream.mli3
-rw-r--r--stdlib/string.ml223
-rw-r--r--stdlib/string.mli165
-rw-r--r--stdlib/stringLabels.mli145
-rw-r--r--stdlib/sys.mli9
-rw-r--r--stdlib/weak.ml4
63 files changed, 6363 insertions, 2470 deletions
diff --git a/stdlib/.depend b/stdlib/.depend
index e5b65ad51..96f95082d 100644
--- a/stdlib/.depend
+++ b/stdlib/.depend
@@ -2,7 +2,11 @@ arg.cmi :
array.cmi :
arrayLabels.cmi :
buffer.cmi :
+bytes.cmi :
+bytesLabels.cmi :
callback.cmi :
+camlinternalFormat.cmi : camlinternalFormatBasics.cmi buffer.cmi
+camlinternalFormatBasics.cmi :
camlinternalLazy.cmi :
camlinternalMod.cmi : obj.cmi
camlinternalOO.cmi : obj.cmi
@@ -27,16 +31,17 @@ nativeint.cmi :
obj.cmi : int32.cmi
oo.cmi : camlinternalOO.cmi
parsing.cmi : obj.cmi lexing.cmi
-pervasives.cmi :
+pervasives.cmi : camlinternalFormatBasics.cmi
printexc.cmi :
-printf.cmi : obj.cmi buffer.cmi
+printf.cmi : buffer.cmi
queue.cmi :
random.cmi : nativeint.cmi int64.cmi int32.cmi
scanf.cmi : pervasives.cmi
set.cmi :
sort.cmi :
stack.cmi :
-stdLabels.cmi : stringLabels.cmi listLabels.cmi arrayLabels.cmi
+stdLabels.cmi : stringLabels.cmi listLabels.cmi bytesLabels.cmi \
+ arrayLabels.cmi
stream.cmi :
string.cmi :
stringLabels.cmi :
@@ -50,10 +55,20 @@ array.cmo : array.cmi
array.cmx : array.cmi
arrayLabels.cmo : array.cmi arrayLabels.cmi
arrayLabels.cmx : array.cmx arrayLabels.cmi
-buffer.cmo : sys.cmi string.cmi buffer.cmi
-buffer.cmx : sys.cmx string.cmx buffer.cmi
+buffer.cmo : sys.cmi string.cmi bytes.cmi buffer.cmi
+buffer.cmx : sys.cmx string.cmx bytes.cmx buffer.cmi
+bytes.cmo : pervasives.cmi list.cmi char.cmi bytes.cmi
+bytes.cmx : pervasives.cmx list.cmx char.cmx bytes.cmi
+bytesLabels.cmo : bytes.cmi bytesLabels.cmi
+bytesLabels.cmx : bytes.cmx bytesLabels.cmi
callback.cmo : obj.cmi callback.cmi
callback.cmx : obj.cmx callback.cmi
+camlinternalFormat.cmo : sys.cmi string.cmi char.cmi \
+ camlinternalFormatBasics.cmi bytes.cmi buffer.cmi camlinternalFormat.cmi
+camlinternalFormat.cmx : sys.cmx string.cmx char.cmx \
+ camlinternalFormatBasics.cmx bytes.cmx buffer.cmx camlinternalFormat.cmi
+camlinternalFormatBasics.cmo : camlinternalFormatBasics.cmi
+camlinternalFormatBasics.cmx : camlinternalFormatBasics.cmi
camlinternalLazy.cmo : obj.cmi camlinternalLazy.cmi
camlinternalLazy.cmx : obj.cmx camlinternalLazy.cmi
camlinternalMod.cmo : obj.cmi camlinternalOO.cmi array.cmi \
@@ -68,20 +83,22 @@ char.cmo : char.cmi
char.cmx : char.cmi
complex.cmo : complex.cmi
complex.cmx : complex.cmi
-digest.cmo : string.cmi char.cmi digest.cmi
-digest.cmx : string.cmx char.cmx digest.cmi
+digest.cmo : string.cmi char.cmi bytes.cmi digest.cmi
+digest.cmx : string.cmx char.cmx bytes.cmx digest.cmi
filename.cmo : sys.cmi string.cmi random.cmi printf.cmi lazy.cmi buffer.cmi \
filename.cmi
filename.cmx : sys.cmx string.cmx random.cmx printf.cmx lazy.cmx buffer.cmx \
filename.cmi
-format.cmo : string.cmi printf.cmi pervasives.cmi obj.cmi list.cmi \
- buffer.cmi format.cmi
-format.cmx : string.cmx printf.cmx pervasives.cmx obj.cmx list.cmx \
- buffer.cmx format.cmi
+format.cmo : string.cmi pervasives.cmi camlinternalFormatBasics.cmi \
+ camlinternalFormat.cmi buffer.cmi format.cmi
+format.cmx : string.cmx pervasives.cmx camlinternalFormatBasics.cmx \
+ camlinternalFormat.cmx buffer.cmx format.cmi
gc.cmo : sys.cmi printf.cmi gc.cmi
gc.cmx : sys.cmx printf.cmx gc.cmi
-genlex.cmo : string.cmi stream.cmi list.cmi hashtbl.cmi char.cmi genlex.cmi
-genlex.cmx : string.cmx stream.cmx list.cmx hashtbl.cmx char.cmx genlex.cmi
+genlex.cmo : string.cmi stream.cmi list.cmi hashtbl.cmi char.cmi bytes.cmi \
+ genlex.cmi
+genlex.cmx : string.cmx stream.cmx list.cmx hashtbl.cmx char.cmx bytes.cmx \
+ genlex.cmi
hashtbl.cmo : sys.cmi string.cmi random.cmi obj.cmi lazy.cmi array.cmi \
hashtbl.cmi
hashtbl.cmx : sys.cmx string.cmx random.cmx obj.cmx lazy.cmx array.cmx \
@@ -92,16 +109,16 @@ int64.cmo : pervasives.cmi int64.cmi
int64.cmx : pervasives.cmx int64.cmi
lazy.cmo : obj.cmi camlinternalLazy.cmi lazy.cmi
lazy.cmx : obj.cmx camlinternalLazy.cmx lazy.cmi
-lexing.cmo : sys.cmi string.cmi array.cmi lexing.cmi
-lexing.cmx : sys.cmx string.cmx array.cmx lexing.cmi
+lexing.cmo : sys.cmi string.cmi bytes.cmi array.cmi lexing.cmi
+lexing.cmx : sys.cmx string.cmx bytes.cmx array.cmx lexing.cmi
list.cmo : list.cmi
list.cmx : list.cmi
listLabels.cmo : list.cmi listLabels.cmi
listLabels.cmx : list.cmx listLabels.cmi
map.cmo : map.cmi
map.cmx : map.cmi
-marshal.cmo : string.cmi marshal.cmi
-marshal.cmx : string.cmx marshal.cmi
+marshal.cmo : bytes.cmi marshal.cmi
+marshal.cmx : bytes.cmx marshal.cmi
moreLabels.cmo : set.cmi map.cmi hashtbl.cmi moreLabels.cmi
moreLabels.cmx : set.cmx map.cmx hashtbl.cmx moreLabels.cmi
nativeint.cmo : sys.cmi pervasives.cmi nativeint.cmi
@@ -112,13 +129,15 @@ oo.cmo : camlinternalOO.cmi oo.cmi
oo.cmx : camlinternalOO.cmx oo.cmi
parsing.cmo : obj.cmi lexing.cmi array.cmi parsing.cmi
parsing.cmx : obj.cmx lexing.cmx array.cmx parsing.cmi
-pervasives.cmo : pervasives.cmi
-pervasives.cmx : pervasives.cmi
-printexc.cmo : printf.cmi obj.cmi buffer.cmi array.cmi printexc.cmi
-printexc.cmx : printf.cmx obj.cmx buffer.cmx array.cmx printexc.cmi
-printf.cmo : string.cmi obj.cmi list.cmi char.cmi buffer.cmi array.cmi \
+pervasives.cmo : camlinternalFormatBasics.cmi pervasives.cmi
+pervasives.cmx : camlinternalFormatBasics.cmx pervasives.cmi
+printexc.cmo : printf.cmi pervasives.cmi obj.cmi buffer.cmi array.cmi \
+ printexc.cmi
+printexc.cmx : printf.cmx pervasives.cmx obj.cmx buffer.cmx array.cmx \
+ printexc.cmi
+printf.cmo : camlinternalFormatBasics.cmi camlinternalFormat.cmi buffer.cmi \
printf.cmi
-printf.cmx : string.cmx obj.cmx list.cmx char.cmx buffer.cmx array.cmx \
+printf.cmx : camlinternalFormatBasics.cmx camlinternalFormat.cmx buffer.cmx \
printf.cmi
queue.cmo : obj.cmi queue.cmi
queue.cmx : obj.cmx queue.cmi
@@ -126,26 +145,28 @@ random.cmo : string.cmi pervasives.cmi nativeint.cmi int64.cmi int32.cmi \
digest.cmi char.cmi array.cmi random.cmi
random.cmx : string.cmx pervasives.cmx nativeint.cmx int64.cmx int32.cmx \
digest.cmx char.cmx array.cmx random.cmi
-scanf.cmo : string.cmi printf.cmi pervasives.cmi obj.cmi list.cmi \
- hashtbl.cmi buffer.cmi array.cmi scanf.cmi
-scanf.cmx : string.cmx printf.cmx pervasives.cmx obj.cmx list.cmx \
- hashtbl.cmx buffer.cmx array.cmx scanf.cmi
+scanf.cmo : string.cmi printf.cmi pervasives.cmi list.cmi \
+ camlinternalFormatBasics.cmi camlinternalFormat.cmi bytes.cmi buffer.cmi \
+ scanf.cmi
+scanf.cmx : string.cmx printf.cmx pervasives.cmx list.cmx \
+ camlinternalFormatBasics.cmx camlinternalFormat.cmx bytes.cmx buffer.cmx \
+ scanf.cmi
set.cmo : list.cmi set.cmi
set.cmx : list.cmx set.cmi
sort.cmo : array.cmi sort.cmi
sort.cmx : array.cmx sort.cmi
stack.cmo : list.cmi stack.cmi
stack.cmx : list.cmx stack.cmi
-stdLabels.cmo : stringLabels.cmi listLabels.cmi arrayLabels.cmi \
- stdLabels.cmi
-stdLabels.cmx : stringLabels.cmx listLabels.cmx arrayLabels.cmx \
- stdLabels.cmi
+stdLabels.cmo : stringLabels.cmi listLabels.cmi bytesLabels.cmi \
+ arrayLabels.cmi stdLabels.cmi
+stdLabels.cmx : stringLabels.cmx listLabels.cmx bytesLabels.cmx \
+ arrayLabels.cmx stdLabels.cmi
std_exit.cmo :
std_exit.cmx :
-stream.cmo : string.cmi obj.cmi list.cmi lazy.cmi stream.cmi
-stream.cmx : string.cmx obj.cmx list.cmx lazy.cmx stream.cmi
-string.cmo : pervasives.cmi list.cmi char.cmi string.cmi
-string.cmx : pervasives.cmx list.cmx char.cmx string.cmi
+stream.cmo : string.cmi obj.cmi list.cmi lazy.cmi bytes.cmi stream.cmi
+stream.cmx : string.cmx obj.cmx list.cmx lazy.cmx bytes.cmx stream.cmi
+string.cmo : pervasives.cmi list.cmi bytes.cmi string.cmi
+string.cmx : pervasives.cmx list.cmx bytes.cmx string.cmi
stringLabels.cmo : string.cmi stringLabels.cmi
stringLabels.cmx : string.cmx stringLabels.cmi
sys.cmo : sys.cmi
@@ -160,10 +181,20 @@ array.cmo : array.cmi
array.p.cmx : array.cmi
arrayLabels.cmo : array.cmi arrayLabels.cmi
arrayLabels.p.cmx : array.p.cmx arrayLabels.cmi
-buffer.cmo : sys.cmi string.cmi buffer.cmi
-buffer.p.cmx : sys.p.cmx string.p.cmx buffer.cmi
+buffer.cmo : sys.cmi string.cmi bytes.cmi buffer.cmi
+buffer.p.cmx : sys.p.cmx string.p.cmx bytes.p.cmx buffer.cmi
+bytes.cmo : pervasives.cmi list.cmi char.cmi bytes.cmi
+bytes.p.cmx : pervasives.p.cmx list.p.cmx char.p.cmx bytes.cmi
+bytesLabels.cmo : bytes.cmi bytesLabels.cmi
+bytesLabels.p.cmx : bytes.p.cmx bytesLabels.cmi
callback.cmo : obj.cmi callback.cmi
callback.p.cmx : obj.p.cmx callback.cmi
+camlinternalFormat.cmo : sys.cmi string.cmi char.cmi \
+ camlinternalFormatBasics.cmi bytes.cmi buffer.cmi camlinternalFormat.cmi
+camlinternalFormat.p.cmx : sys.p.cmx string.p.cmx char.p.cmx \
+ camlinternalFormatBasics.p.cmx bytes.p.cmx buffer.p.cmx camlinternalFormat.cmi
+camlinternalFormatBasics.cmo : camlinternalFormatBasics.cmi
+camlinternalFormatBasics.p.cmx : camlinternalFormatBasics.cmi
camlinternalLazy.cmo : obj.cmi camlinternalLazy.cmi
camlinternalLazy.p.cmx : obj.p.cmx camlinternalLazy.cmi
camlinternalMod.cmo : obj.cmi camlinternalOO.cmi array.cmi \
@@ -178,20 +209,22 @@ char.cmo : char.cmi
char.p.cmx : char.cmi
complex.cmo : complex.cmi
complex.p.cmx : complex.cmi
-digest.cmo : string.cmi char.cmi digest.cmi
-digest.p.cmx : string.p.cmx char.p.cmx digest.cmi
+digest.cmo : string.cmi char.cmi bytes.cmi digest.cmi
+digest.p.cmx : string.p.cmx char.p.cmx bytes.p.cmx digest.cmi
filename.cmo : sys.cmi string.cmi random.cmi printf.cmi lazy.cmi buffer.cmi \
filename.cmi
filename.p.cmx : sys.p.cmx string.p.cmx random.p.cmx printf.p.cmx lazy.p.cmx buffer.p.cmx \
filename.cmi
-format.cmo : string.cmi printf.cmi pervasives.cmi obj.cmi list.cmi \
- buffer.cmi format.cmi
-format.p.cmx : string.p.cmx printf.p.cmx pervasives.p.cmx obj.p.cmx list.p.cmx \
- buffer.p.cmx format.cmi
+format.cmo : string.cmi pervasives.cmi camlinternalFormatBasics.cmi \
+ camlinternalFormat.cmi buffer.cmi format.cmi
+format.p.cmx : string.p.cmx pervasives.p.cmx camlinternalFormatBasics.p.cmx \
+ camlinternalFormat.p.cmx buffer.p.cmx format.cmi
gc.cmo : sys.cmi printf.cmi gc.cmi
gc.p.cmx : sys.p.cmx printf.p.cmx gc.cmi
-genlex.cmo : string.cmi stream.cmi list.cmi hashtbl.cmi char.cmi genlex.cmi
-genlex.p.cmx : string.p.cmx stream.p.cmx list.p.cmx hashtbl.p.cmx char.p.cmx genlex.cmi
+genlex.cmo : string.cmi stream.cmi list.cmi hashtbl.cmi char.cmi bytes.cmi \
+ genlex.cmi
+genlex.p.cmx : string.p.cmx stream.p.cmx list.p.cmx hashtbl.p.cmx char.p.cmx bytes.p.cmx \
+ genlex.cmi
hashtbl.cmo : sys.cmi string.cmi random.cmi obj.cmi lazy.cmi array.cmi \
hashtbl.cmi
hashtbl.p.cmx : sys.p.cmx string.p.cmx random.p.cmx obj.p.cmx lazy.p.cmx array.p.cmx \
@@ -202,16 +235,16 @@ int64.cmo : pervasives.cmi int64.cmi
int64.p.cmx : pervasives.p.cmx int64.cmi
lazy.cmo : obj.cmi camlinternalLazy.cmi lazy.cmi
lazy.p.cmx : obj.p.cmx camlinternalLazy.p.cmx lazy.cmi
-lexing.cmo : sys.cmi string.cmi array.cmi lexing.cmi
-lexing.p.cmx : sys.p.cmx string.p.cmx array.p.cmx lexing.cmi
+lexing.cmo : sys.cmi string.cmi bytes.cmi array.cmi lexing.cmi
+lexing.p.cmx : sys.p.cmx string.p.cmx bytes.p.cmx array.p.cmx lexing.cmi
list.cmo : list.cmi
list.p.cmx : list.cmi
listLabels.cmo : list.cmi listLabels.cmi
listLabels.p.cmx : list.p.cmx listLabels.cmi
map.cmo : map.cmi
map.p.cmx : map.cmi
-marshal.cmo : string.cmi marshal.cmi
-marshal.p.cmx : string.p.cmx marshal.cmi
+marshal.cmo : bytes.cmi marshal.cmi
+marshal.p.cmx : bytes.p.cmx marshal.cmi
moreLabels.cmo : set.cmi map.cmi hashtbl.cmi moreLabels.cmi
moreLabels.p.cmx : set.p.cmx map.p.cmx hashtbl.p.cmx moreLabels.cmi
nativeint.cmo : sys.cmi pervasives.cmi nativeint.cmi
@@ -222,13 +255,15 @@ oo.cmo : camlinternalOO.cmi oo.cmi
oo.p.cmx : camlinternalOO.p.cmx oo.cmi
parsing.cmo : obj.cmi lexing.cmi array.cmi parsing.cmi
parsing.p.cmx : obj.p.cmx lexing.p.cmx array.p.cmx parsing.cmi
-pervasives.cmo : pervasives.cmi
-pervasives.p.cmx : pervasives.cmi
-printexc.cmo : printf.cmi obj.cmi buffer.cmi array.cmi printexc.cmi
-printexc.p.cmx : printf.p.cmx obj.p.cmx buffer.p.cmx array.p.cmx printexc.cmi
-printf.cmo : string.cmi obj.cmi list.cmi char.cmi buffer.cmi array.cmi \
+pervasives.cmo : camlinternalFormatBasics.cmi pervasives.cmi
+pervasives.p.cmx : camlinternalFormatBasics.p.cmx pervasives.cmi
+printexc.cmo : printf.cmi pervasives.cmi obj.cmi buffer.cmi array.cmi \
+ printexc.cmi
+printexc.p.cmx : printf.p.cmx pervasives.p.cmx obj.p.cmx buffer.p.cmx array.p.cmx \
+ printexc.cmi
+printf.cmo : camlinternalFormatBasics.cmi camlinternalFormat.cmi buffer.cmi \
printf.cmi
-printf.p.cmx : string.p.cmx obj.p.cmx list.p.cmx char.p.cmx buffer.p.cmx array.p.cmx \
+printf.p.cmx : camlinternalFormatBasics.p.cmx camlinternalFormat.p.cmx buffer.p.cmx \
printf.cmi
queue.cmo : obj.cmi queue.cmi
queue.p.cmx : obj.p.cmx queue.cmi
@@ -236,26 +271,28 @@ random.cmo : string.cmi pervasives.cmi nativeint.cmi int64.cmi int32.cmi \
digest.cmi char.cmi array.cmi random.cmi
random.p.cmx : string.p.cmx pervasives.p.cmx nativeint.p.cmx int64.p.cmx int32.p.cmx \
digest.p.cmx char.p.cmx array.p.cmx random.cmi
-scanf.cmo : string.cmi printf.cmi pervasives.cmi obj.cmi list.cmi \
- hashtbl.cmi buffer.cmi array.cmi scanf.cmi
-scanf.p.cmx : string.p.cmx printf.p.cmx pervasives.p.cmx obj.p.cmx list.p.cmx \
- hashtbl.p.cmx buffer.p.cmx array.p.cmx scanf.cmi
+scanf.cmo : string.cmi printf.cmi pervasives.cmi list.cmi \
+ camlinternalFormatBasics.cmi camlinternalFormat.cmi bytes.cmi buffer.cmi \
+ scanf.cmi
+scanf.p.cmx : string.p.cmx printf.p.cmx pervasives.p.cmx list.p.cmx \
+ camlinternalFormatBasics.p.cmx camlinternalFormat.p.cmx bytes.p.cmx buffer.p.cmx \
+ scanf.cmi
set.cmo : list.cmi set.cmi
set.p.cmx : list.p.cmx set.cmi
sort.cmo : array.cmi sort.cmi
sort.p.cmx : array.p.cmx sort.cmi
stack.cmo : list.cmi stack.cmi
stack.p.cmx : list.p.cmx stack.cmi
-stdLabels.cmo : stringLabels.cmi listLabels.cmi arrayLabels.cmi \
- stdLabels.cmi
-stdLabels.p.cmx : stringLabels.p.cmx listLabels.p.cmx arrayLabels.p.cmx \
- stdLabels.cmi
+stdLabels.cmo : stringLabels.cmi listLabels.cmi bytesLabels.cmi \
+ arrayLabels.cmi stdLabels.cmi
+stdLabels.p.cmx : stringLabels.p.cmx listLabels.p.cmx bytesLabels.p.cmx \
+ arrayLabels.p.cmx stdLabels.cmi
std_exit.cmo :
std_exit.p.cmx :
-stream.cmo : string.cmi obj.cmi list.cmi lazy.cmi stream.cmi
-stream.p.cmx : string.p.cmx obj.p.cmx list.p.cmx lazy.p.cmx stream.cmi
-string.cmo : pervasives.cmi list.cmi char.cmi string.cmi
-string.p.cmx : pervasives.p.cmx list.p.cmx char.p.cmx string.cmi
+stream.cmo : string.cmi obj.cmi list.cmi lazy.cmi bytes.cmi stream.cmi
+stream.p.cmx : string.p.cmx obj.p.cmx list.p.cmx lazy.p.cmx bytes.p.cmx stream.cmi
+string.cmo : pervasives.cmi list.cmi bytes.cmi string.cmi
+string.p.cmx : pervasives.p.cmx list.p.cmx bytes.p.cmx string.cmi
stringLabels.cmo : string.cmi stringLabels.cmi
stringLabels.p.cmx : string.p.cmx stringLabels.cmi
sys.cmo : sys.cmi
diff --git a/stdlib/Compflags b/stdlib/Compflags
index d0938af89..f393c4ec0 100755
--- a/stdlib/Compflags
+++ b/stdlib/Compflags
@@ -18,8 +18,11 @@ case $1 in
camlinternalOO.cmx|camlinternalOO.p.cmx) echo ' -inline 0';;
buffer.cmx|buffer.p.cmx) echo ' -inline 3';;
# make sure add_char is inlined (PR#5872)
- buffer.cm[io]|printf.cm[io]|format.cm[io]|scanf.cm[io]) echo ' -w A';;
+ buffer.cm[io]) echo ' -w A';;
+ camlinternalFormat.cm[io]) echo ' -w Ae';;
+ camlinternalFormatBasics*.cm[iox]) echo ' -nopervasives';;
+ printf.cm[io]|format.cm[io]|scanf.cm[io]) echo ' -w Ae';;
scanf.cmx|scanf.p.cmx) echo ' -inline 9';;
- *Labels.cm[ox]|*Labels.p.cmx) echo ' -nolabels -trans-mod';;
+ *Labels.cm[ox]|*Labels.p.cmx) echo ' -nolabels -no-alias-deps';;
*) echo ' ';;
esac
diff --git a/stdlib/Makefile b/stdlib/Makefile
index 80be16e0b..37f9a5f0b 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -25,19 +25,22 @@ allopt-prof: stdlib.p.cmxa std_exit.p.cmx
installopt: installopt-default installopt-$(PROFILING)
installopt-default:
- cp stdlib.cmxa stdlib.a std_exit.o *.cmx $(LIBDIR)
- cd $(LIBDIR); $(RANLIB) stdlib.a
+ cp stdlib.cmxa stdlib.a std_exit.o *.cmx $(INSTALL_LIBDIR)
+ cd $(INSTALL_LIBDIR); $(RANLIB) stdlib.a
installopt-noprof:
- rm -f $(LIBDIR)/stdlib.p.cmxa; ln -s stdlib.cmxa $(LIBDIR)/stdlib.p.cmxa
- rm -f $(LIBDIR)/stdlib.p.a; ln -s stdlib.a $(LIBDIR)/stdlib.p.a
- rm -f $(LIBDIR)/std_exit.p.cmx; \
- ln -s std_exit.cmx $(LIBDIR)/std_exit.p.cmx
- rm -f $(LIBDIR)/std_exit.p.o; ln -s std_exit.o $(LIBDIR)/std_exit.p.o
+ rm -f $(INSTALL_LIBDIR)/stdlib.p.cmxa; \
+ ln -s stdlib.cmxa $(INSTALL_LIBDIR)/stdlib.p.cmxa
+ rm -f $(INSTALL_LIBDIR)/stdlib.p.a; \
+ ln -s stdlib.a $(INSTALL_LIBDIR)/stdlib.p.a
+ rm -f $(INSTALL_LIBDIR)/std_exit.p.cmx; \
+ ln -s std_exit.cmx $(INSTALL_LIBDIR)/std_exit.p.cmx
+ rm -f $(INSTALL_LIBDIR)/std_exit.p.o; \
+ ln -s std_exit.o $(INSTALL_LIBDIR)/std_exit.p.o
installopt-prof:
- cp stdlib.p.cmxa stdlib.p.a std_exit.p.cmx std_exit.p.o $(LIBDIR)
- cd $(LIBDIR); $(RANLIB) stdlib.p.a
+ cp stdlib.p.cmxa stdlib.p.a std_exit.p.cmx std_exit.p.o $(INSTALL_LIBDIR)
+ cd $(INSTALL_LIBDIR); $(RANLIB) stdlib.p.a
stdlib.p.cmxa: $(OBJS:.cmo=.p.cmx)
$(CAMLOPT) -a -o stdlib.p.cmxa $(OBJS:.cmo=.p.cmx)
diff --git a/stdlib/Makefile.nt b/stdlib/Makefile.nt
index b85622b90..590701bf9 100644
--- a/stdlib/Makefile.nt
+++ b/stdlib/Makefile.nt
@@ -16,7 +16,7 @@ include Makefile.shared
allopt: stdlib.cmxa std_exit.cmx
installopt:
- cp stdlib.cmxa stdlib.$(A) std_exit.$(O) *.cmx $(LIBDIR)
+ cp stdlib.cmxa stdlib.$(A) std_exit.$(O) *.cmx $(INSTALL_LIBDIR)
camlheader camlheader_ur: headernt.c ../config/Makefile
$(BYTECC) $(BYTECCCOMPOPTS) -c -I../byterun \
diff --git a/stdlib/Makefile.shared b/stdlib/Makefile.shared
index 80c40d600..54de337cb 100755
--- a/stdlib/Makefile.shared
+++ b/stdlib/Makefile.shared
@@ -15,38 +15,41 @@ include ../config/Makefile
RUNTIME=../boot/ocamlrun
COMPILER=../ocamlc
CAMLC=$(RUNTIME) $(COMPILER)
-COMPFLAGS=-strict-sequence -w +33..39 -g -warn-error A -bin-annot -nostdlib
+COMPFLAGS=-strict-sequence -w +33..39 -g -warn-error A -bin-annot -nostdlib \
+ -safe-string
OPTCOMPILER=../ocamlopt
CAMLOPT=$(RUNTIME) $(OPTCOMPILER)
-OPTCOMPFLAGS=-warn-error A -nostdlib -g -bin-annot
CAMLDEP=../boot/ocamlrun ../tools/ocamldep
-OBJS=pervasives.cmo $(OTHERS)
-OTHERS=array.cmo list.cmo char.cmo string.cmo sys.cmo \
+OBJS=camlinternalFormatBasics.cmo pervasives.cmo $(OTHERS)
+OTHERS=array.cmo list.cmo char.cmo bytes.cmo string.cmo sys.cmo \
sort.cmo marshal.cmo obj.cmo \
int32.cmo int64.cmo nativeint.cmo \
lexing.cmo parsing.cmo \
set.cmo map.cmo stack.cmo queue.cmo \
camlinternalLazy.cmo lazy.cmo stream.cmo \
- buffer.cmo printf.cmo \
+ buffer.cmo camlinternalFormat.cmo printf.cmo \
arg.cmo printexc.cmo gc.cmo \
digest.cmo random.cmo hashtbl.cmo format.cmo scanf.cmo callback.cmo \
camlinternalOO.cmo oo.cmo camlinternalMod.cmo \
genlex.cmo weak.cmo \
filename.cmo complex.cmo \
- arrayLabels.cmo listLabels.cmo stringLabels.cmo moreLabels.cmo stdLabels.cmo
+ arrayLabels.cmo listLabels.cmo bytesLabels.cmo \
+ stringLabels.cmo moreLabels.cmo stdLabels.cmo
all: stdlib.cma std_exit.cmo camlheader camlheader_ur
+INSTALL_LIBDIR=$(DESTDIR)$(LIBDIR)
+
install: install-$(RUNTIMED)
cp stdlib.cma std_exit.cmo *.cmi *.mli *.ml camlheader camlheader_ur \
- $(LIBDIR)
+ $(INSTALL_LIBDIR)
install-noruntimed:
.PHONY: install-noruntimed
install-runtimed: camlheaderd
- cp camlheaderd $(LIBDIR)
+ cp camlheaderd $(INSTALL_LIBDIR)
.PHONY: install-runtimed
stdlib.cma: $(OBJS)
@@ -73,10 +76,10 @@ clean::
$(CAMLC) $(COMPFLAGS) `./Compflags $@` -c $<
.ml.cmx:
- $(CAMLOPT) $(OPTCOMPFLAGS) `./Compflags $@` -c $<
+ $(CAMLOPT) $(COMPFLAGS) `./Compflags $@` -c $<
.ml.p.cmx:
- $(CAMLOPT) $(OPTCOMPFLAGS) `./Compflags $@` -p -c -o $*.p.cmx $<
+ $(CAMLOPT) $(COMPFLAGS) `./Compflags $@` -p -c -o $*.p.cmx $<
# Dependencies on the compiler
$(OBJS) std_exit.cmo: $(COMPILER)
@@ -85,7 +88,7 @@ $(OBJS:.cmo=.cmx) std_exit.cmx: $(OPTCOMPILER)
$(OBJS:.cmo=.p.cmx) std_exit.p.cmx: $(OPTCOMPILER)
# Dependencies on Pervasives (not tracked by ocamldep)
-$(OBJS) std_exit.cmo: pervasives.cmi
+$(OTHERS) std_exit.cmo: pervasives.cmi
$(OTHERS:.cmo=.cmi) std_exit.cmi: pervasives.cmi
$(OBJS:.cmo=.cmx) std_exit.cmx: pervasives.cmi
$(OBJS:.cmo=.p.cmx) std_exit.p.cmx: pervasives.cmi
diff --git a/stdlib/StdlibModules b/stdlib/StdlibModules
index c5c8896ed..abdfcb362 100644
--- a/stdlib/StdlibModules
+++ b/stdlib/StdlibModules
@@ -21,7 +21,11 @@ STDLIB_MODULES=\
array \
arrayLabels \
buffer \
+ bytes \
+ bytesLabels \
callback \
+ camlinternalFormat \
+ camlinternalFormatBasics \
camlinternalLazy \
camlinternalMod \
camlinternalOO \
diff --git a/stdlib/arg.ml b/stdlib/arg.ml
index 8b64236a7..0f6480b82 100644
--- a/stdlib/arg.ml
+++ b/stdlib/arg.ml
@@ -112,11 +112,11 @@ let parse_argv_dynamic ?(current=current) argv speclist anonfun errmsg =
| Unknown "-help" -> ()
| Unknown "--help" -> ()
| Unknown s ->
- bprintf b "%s: unknown option `%s'.\n" progname s
+ bprintf b "%s: unknown option '%s'.\n" progname s
| Missing s ->
- bprintf b "%s: option `%s' needs an argument.\n" progname s
+ bprintf b "%s: option '%s' needs an argument.\n" progname s
| Wrong (opt, arg, expected) ->
- bprintf b "%s: wrong argument `%s'; option `%s' expects %s.\n"
+ bprintf b "%s: wrong argument '%s'; option '%s' expects %s.\n"
progname arg opt expected
| Message s ->
bprintf b "%s: %s.\n" progname s
@@ -129,7 +129,7 @@ let parse_argv_dynamic ?(current=current) argv speclist anonfun errmsg =
incr current;
while !current < l do
let s = argv.(!current) in
- if String.length s >= 1 && String.get s 0 = '-' then begin
+ if String.length s >= 1 && s.[0] = '-' then begin
let action =
try assoc3 s !speclist
with Not_found -> stop (Unknown s)
diff --git a/stdlib/arg.mli b/stdlib/arg.mli
index e6e07316d..22eda40b7 100644
--- a/stdlib/arg.mli
+++ b/stdlib/arg.mli
@@ -94,7 +94,7 @@ val parse :
*)
val parse_dynamic :
- (key * spec * doc) list ref -> anon_fun -> string -> unit
+ (key * spec * doc) list ref -> anon_fun -> usage_msg -> unit
(** Same as {!Arg.parse}, except that the [speclist] argument is a reference
and may be updated during the parsing. A typical use for this feature
is to parse command lines of the form:
diff --git a/stdlib/array.mli b/stdlib/array.mli
index 7c0049e28..e9a64528f 100644
--- a/stdlib/array.mli
+++ b/stdlib/array.mli
@@ -47,6 +47,7 @@ external make : int -> 'a -> 'a array = "caml_make_vect"
size is only [Sys.max_array_length / 2].*)
external create : int -> 'a -> 'a array = "caml_make_vect"
+ [@@ocaml.deprecated]
(** @deprecated [Array.create] is an alias for {!Array.make}. *)
val init : int -> (int -> 'a) -> 'a array
@@ -73,6 +74,7 @@ val make_matrix : int -> int -> 'a -> 'a array array
size is only [Sys.max_array_length / 2]. *)
val create_matrix : int -> int -> 'a -> 'a array array
+ [@@ocaml.deprecated]
(** @deprecated [Array.create_matrix] is an alias for {!Array.make_matrix}. *)
val append : 'a array -> 'a array -> 'a array
diff --git a/stdlib/arrayLabels.mli b/stdlib/arrayLabels.mli
index 03b6224ae..cf8b650e5 100644
--- a/stdlib/arrayLabels.mli
+++ b/stdlib/arrayLabels.mli
@@ -47,7 +47,8 @@ external make : int -> 'a -> 'a array = "caml_make_vect"
size is only [Sys.max_array_length / 2].*)
external create : int -> 'a -> 'a array = "caml_make_vect"
-(** @deprecated [Array.create] is an alias for {!ArrayLabels.make}. *)
+ [@@ocaml.deprecated]
+(** @deprecated [ArrayLabels.create] is an alias for {!ArrayLabels.make}. *)
val init : int -> f:(int -> 'a) -> 'a array
(** [Array.init n f] returns a fresh array of length [n],
@@ -73,7 +74,8 @@ val make_matrix : dimx:int -> dimy:int -> 'a -> 'a array array
size is only [Sys.max_array_length / 2]. *)
val create_matrix : dimx:int -> dimy:int -> 'a -> 'a array array
-(** @deprecated [Array.create_matrix] is an alias for
+ [@@ocaml.deprecated]
+(** @deprecated [ArrayLabels.create_matrix] is an alias for
{!ArrayLabels.make_matrix}. *)
val append : 'a array -> 'a array -> 'a array
diff --git a/stdlib/buffer.ml b/stdlib/buffer.ml
index 78a9e2611..986fe6f33 100644
--- a/stdlib/buffer.ml
+++ b/stdlib/buffer.ml
@@ -14,41 +14,38 @@
(* Extensible buffers *)
type t =
- {mutable buffer : string;
+ {mutable buffer : bytes;
mutable position : int;
mutable length : int;
- initial_buffer : string}
+ initial_buffer : bytes}
let create n =
let n = if n < 1 then 1 else n in
let n = if n > Sys.max_string_length then Sys.max_string_length else n in
- let s = String.create n in
+ let s = Bytes.create n in
{buffer = s; position = 0; length = n; initial_buffer = s}
-let contents b = String.sub b.buffer 0 b.position
+let contents b = Bytes.sub_string b.buffer 0 b.position
+let to_bytes b = Bytes.sub b.buffer 0 b.position
let sub b ofs len =
if ofs < 0 || len < 0 || ofs > b.position - len
then invalid_arg "Buffer.sub"
- else begin
- let r = String.create len in
- String.unsafe_blit b.buffer ofs r 0 len;
- r
- end
+ else Bytes.sub_string b.buffer ofs len
;;
let blit src srcoff dst dstoff len =
if len < 0 || srcoff < 0 || srcoff > src.position - len
- || dstoff < 0 || dstoff > (String.length dst) - len
+ || dstoff < 0 || dstoff > (Bytes.length dst) - len
then invalid_arg "Buffer.blit"
else
- String.blit src.buffer srcoff dst dstoff len
+ Bytes.unsafe_blit src.buffer srcoff dst dstoff len
;;
let nth b ofs =
if ofs < 0 || ofs >= b.position then
invalid_arg "Buffer.nth"
- else String.unsafe_get b.buffer ofs
+ else Bytes.unsafe_get b.buffer ofs
;;
let length b = b.position
@@ -57,7 +54,7 @@ let clear b = b.position <- 0
let reset b =
b.position <- 0; b.buffer <- b.initial_buffer;
- b.length <- String.length b.buffer
+ b.length <- Bytes.length b.buffer
let resize b more =
let len = b.length in
@@ -68,34 +65,41 @@ let resize b more =
then new_len := Sys.max_string_length
else failwith "Buffer.add: cannot grow buffer"
end;
- let new_buffer = String.create !new_len in
- String.blit b.buffer 0 new_buffer 0 b.position;
+ let new_buffer = Bytes.create !new_len in
+ (* PR#6148: let's keep using [blit] rather than [unsafe_blit] in
+ this tricky function that is slow anyway. *)
+ Bytes.blit b.buffer 0 new_buffer 0 b.position;
b.buffer <- new_buffer;
b.length <- !new_len
let add_char b c =
let pos = b.position in
if pos >= b.length then resize b 1;
- String.unsafe_set b.buffer pos c;
+ Bytes.unsafe_set b.buffer pos c;
b.position <- pos + 1
let add_substring b s offset len =
- if offset < 0 || len < 0 || offset > String.length s - len
- then invalid_arg "Buffer.add_substring";
+ if offset < 0 || len < 0 || offset + len > String.length s
+ then invalid_arg "Buffer.add_substring/add_subbytes";
let new_position = b.position + len in
if new_position > b.length then resize b len;
- String.unsafe_blit s offset b.buffer b.position len;
+ Bytes.blit_string s offset b.buffer b.position len;
b.position <- new_position
+let add_subbytes b s offset len =
+ add_substring b (Bytes.unsafe_to_string s) offset len
+
let add_string b s =
let len = String.length s in
let new_position = b.position + len in
if new_position > b.length then resize b len;
- String.unsafe_blit s 0 b.buffer b.position len;
+ Bytes.blit_string s 0 b.buffer b.position len;
b.position <- new_position
+let add_bytes b s = add_string b (Bytes.unsafe_to_string s)
+
let add_buffer b bs =
- add_substring b bs.buffer 0 bs.position
+ add_subbytes b bs.buffer 0 bs.position
let add_channel b ic len =
if len < 0 || len > Sys.max_string_length then (* PR#5004 *)
diff --git a/stdlib/buffer.mli b/stdlib/buffer.mli
index c50c98792..e7ce8b999 100644
--- a/stdlib/buffer.mli
+++ b/stdlib/buffer.mli
@@ -11,9 +11,9 @@
(* *)
(***********************************************************************)
-(** Extensible string buffers.
+(** Extensible buffers.
- This module implements string buffers that automatically expand
+ This module implements buffers that automatically expand
as necessary. It provides accumulative concatenation of strings
in quasi-linear time (instead of quadratic time when strings are
concatenated pairwise).
@@ -24,8 +24,8 @@ type t
val create : int -> t
(** [create n] returns a fresh buffer, initially empty.
- The [n] parameter is the initial size of the internal string
- that holds the buffer contents. That string is automatically
+ The [n] parameter is the initial size of the internal byte sequence
+ that holds the buffer contents. That byte sequence is automatically
reallocated when more than [n] characters are stored in the buffer,
but shrinks back to [n] characters when [reset] is called.
For best performance, [n] should be of the same order of magnitude
@@ -40,26 +40,30 @@ val contents : t -> string
(** Return a copy of the current contents of the buffer.
The buffer itself is unchanged. *)
+val to_bytes : t -> bytes
+(** Return a copy of the current contents of the buffer.
+ The buffer itself is unchanged. *)
+
val sub : t -> int -> int -> string
-(** [Buffer.sub b off len] returns (a copy of) the substring of the
-current contents of the buffer [b] starting at offset [off] of length
-[len] bytes. May raise [Invalid_argument] if out of bounds request. The
-buffer itself is unaffected. *)
+(** [Buffer.sub b off len] returns (a copy of) the bytes from the
+ current contents of the buffer [b] starting at offset [off] of
+ length [len] bytes. May raise [Invalid_argument] if out of bounds
+ request. The buffer itself is unaffected. *)
-val blit : t -> int -> string -> int -> int -> unit
+val blit : t -> int -> bytes -> int -> int -> unit
(** [Buffer.blit src srcoff dst dstoff len] copies [len] characters from
the current contents of the buffer [src], starting at offset [srcoff]
- to string [dst], starting at character [dstoff].
+ to [dst], starting at character [dstoff].
Raise [Invalid_argument] if [srcoff] and [len] do not designate a valid
- substring of [src], or if [dstoff] and [len] do not designate a valid
- substring of [dst].
+ range of [src], or if [dstoff] and [len] do not designate a valid
+ range of [dst].
@since 3.11.2
*)
val nth : t -> int -> char
(** get the n-th character of the buffer. Raise [Invalid_argument] if
-index out of bounds *)
+ index out of bounds *)
val length : t -> int
(** Return the number of characters currently contained in the buffer. *)
@@ -68,8 +72,8 @@ val clear : t -> unit
(** Empty the buffer. *)
val reset : t -> unit
-(** Empty the buffer and deallocate the internal string holding the
- buffer contents, replacing it with the initial internal string
+(** Empty the buffer and deallocate the internal byte sequence holding the
+ buffer contents, replacing it with the initial internal byte sequence
of length [n] that was allocated by {!Buffer.create} [n].
For long-lived buffers that may have grown a lot, [reset] allows
faster reclamation of the space used by the buffer. *)
@@ -80,10 +84,17 @@ val add_char : t -> char -> unit
val add_string : t -> string -> unit
(** [add_string b s] appends the string [s] at the end of the buffer [b]. *)
+val add_bytes : t -> bytes -> unit
+(** [add_string b s] appends the string [s] at the end of the buffer [b]. *)
+
val add_substring : t -> string -> int -> int -> unit
(** [add_substring b s ofs len] takes [len] characters from offset
[ofs] in string [s] and appends them at the end of the buffer [b]. *)
+val add_subbytes : t -> bytes -> int -> int -> unit
+(** [add_substring b s ofs len] takes [len] characters from offset
+ [ofs] in byte sequence [s] and appends them at the end of the buffer [b]. *)
+
val add_substitute : t -> (string -> string) -> string -> unit
(** [add_substitute b f s] appends the string pattern [s] at the end
of the buffer [b] with substitution.
diff --git a/stdlib/bytes.ml b/stdlib/bytes.ml
new file mode 100644
index 000000000..ece7c1ea5
--- /dev/null
+++ b/stdlib/bytes.ml
@@ -0,0 +1,253 @@
+(***********************************************************************)
+(* *)
+(* OCaml *)
+(* *)
+(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* *)
+(* Copyright 1996 Institut National de Recherche en Informatique et *)
+(* en Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the GNU Library General Public License, with *)
+(* the special exception on linking described in file ../LICENSE. *)
+(* *)
+(***********************************************************************)
+
+(* Byte sequence operations *)
+
+external length : bytes -> int = "%string_length"
+external string_length : string -> int = "%string_length"
+external get : bytes -> int -> char = "%string_safe_get"
+external set : bytes -> int -> char -> unit = "%string_safe_set"
+external create : int -> bytes = "caml_create_string"
+external unsafe_get : bytes -> int -> char = "%string_unsafe_get"
+external unsafe_set : bytes -> int -> char -> unit = "%string_unsafe_set"
+external unsafe_fill : bytes -> int -> int -> char -> unit
+ = "caml_fill_string" "noalloc"
+external unsafe_to_string : bytes -> string = "%identity"
+external unsafe_of_string : string -> bytes = "%identity"
+
+external unsafe_blit : bytes -> int -> bytes -> int -> int -> unit
+ = "caml_blit_string" "noalloc"
+external unsafe_blit_string : string -> int -> bytes -> int -> int -> unit
+ = "caml_blit_string" "noalloc"
+
+let make n c =
+ let s = create n in
+ unsafe_fill s 0 n c;
+ s
+
+let init n f =
+ let s = create n in
+ for i = 0 to n - 1 do
+ unsafe_set s i (f i)
+ done;
+ s
+
+let empty = create 0;;
+
+let copy s =
+ let len = length s in
+ let r = create len in
+ unsafe_blit s 0 r 0 len;
+ r
+
+let to_string b = unsafe_to_string (copy b)
+let of_string s = copy (unsafe_of_string s)
+
+let sub s ofs len =
+ if ofs < 0 || len < 0 || ofs > length s - len
+ then invalid_arg "Bytes.sub"
+ else begin
+ let r = create len in
+ unsafe_blit s ofs r 0 len;
+ r
+ end
+
+let sub_string b ofs len = unsafe_to_string (sub b ofs len)
+
+let extend s left right =
+ let len = length s + left + right in
+ let r = create len in
+ let (srcoff, dstoff) = if left < 0 then -left, 0 else 0, left in
+ let cpylen = min (length s - srcoff) (len - dstoff) in
+ if cpylen > 0 then unsafe_blit s srcoff r dstoff cpylen;
+ r
+
+let fill s ofs len c =
+ if ofs < 0 || len < 0 || ofs > length s - len
+ then invalid_arg "Bytes.fill"
+ else unsafe_fill s ofs len c
+
+let blit s1 ofs1 s2 ofs2 len =
+ if len < 0 || ofs1 < 0 || ofs1 > length s1 - len
+ || ofs2 < 0 || ofs2 > length s2 - len
+ then invalid_arg "Bytes.blit"
+ else unsafe_blit s1 ofs1 s2 ofs2 len
+
+let blit_string s1 ofs1 s2 ofs2 len =
+ if len < 0 || ofs1 < 0 || ofs1 > string_length s1 - len
+ || ofs2 < 0 || ofs2 > length s2 - len
+ then invalid_arg "Bytes.blit_string"
+ else unsafe_blit_string s1 ofs1 s2 ofs2 len
+
+let iter f a =
+ for i = 0 to length a - 1 do f(unsafe_get a i) done
+
+let iteri f a =
+ for i = 0 to length a - 1 do f i (unsafe_get a i) done
+
+let concat sep l =
+ match l with
+ [] -> empty
+ | hd :: tl ->
+ let num = ref 0 and len = ref 0 in
+ List.iter (fun s -> incr num; len := !len + length s) l;
+ let r = create (!len + length sep * (!num - 1)) in
+ unsafe_blit hd 0 r 0 (length hd);
+ let pos = ref(length hd) in
+ List.iter
+ (fun s ->
+ unsafe_blit sep 0 r !pos (length sep);
+ pos := !pos + length sep;
+ unsafe_blit s 0 r !pos (length s);
+ pos := !pos + length s)
+ tl;
+ r
+
+let cat s1 s2 =
+ let l1 = length s1 in
+ let l2 = length s2 in
+ let r = create (l1 + l2) in
+ unsafe_blit s1 0 r 0 l1;
+ unsafe_blit s2 0 r l1 l2;
+ r
+;;
+
+external is_printable: char -> bool = "caml_is_printable"
+external char_code: char -> int = "%identity"
+external char_chr: int -> char = "%identity"
+
+let is_space = function
+ | ' ' | '\012' | '\n' | '\r' | '\t' -> true
+ | _ -> false
+
+let trim s =
+ let len = length s in
+ let i = ref 0 in
+ while !i < len && is_space (unsafe_get s !i) do
+ incr i
+ done;
+ let j = ref (len - 1) in
+ while !j >= !i && is_space (unsafe_get s !j) do
+ decr j
+ done;
+ if !j >= !i then
+ sub s !i (!j - !i + 1)
+ else
+ empty
+
+let escaped s =
+ let n = ref 0 in
+ for i = 0 to length s - 1 do
+ n := !n +
+ (match unsafe_get s i with
+ | '"' | '\\' | '\n' | '\t' | '\r' | '\b' -> 2
+ | c -> if is_printable c then 1 else 4)
+ done;
+ if !n = length s then copy s else begin
+ let s' = create !n in
+ n := 0;
+ for i = 0 to length s - 1 do
+ begin match unsafe_get s i with
+ | ('"' | '\\') as c ->
+ unsafe_set s' !n '\\'; incr n; unsafe_set s' !n c
+ | '\n' ->
+ unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'n'
+ | '\t' ->
+ unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 't'
+ | '\r' ->
+ unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'r'
+ | '\b' ->
+ unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'b'
+ | c ->
+ if is_printable c then
+ unsafe_set s' !n c
+ else begin
+ let a = char_code c in
+ unsafe_set s' !n '\\';
+ incr n;
+ unsafe_set s' !n (char_chr (48 + a / 100));
+ incr n;
+ unsafe_set s' !n (char_chr (48 + (a / 10) mod 10));
+ incr n;
+ unsafe_set s' !n (char_chr (48 + a mod 10))
+ end
+ end;
+ incr n
+ done;
+ s'
+ end
+
+let map f s =
+ let l = length s in
+ if l = 0 then s else begin
+ let r = create l in
+ for i = 0 to l - 1 do unsafe_set r i (f (unsafe_get s i)) done;
+ r
+ end
+
+let mapi f s =
+ let l = length s in
+ if l = 0 then s else begin
+ let r = create l in
+ for i = 0 to l - 1 do unsafe_set r i (f i (unsafe_get s i)) done;
+ r
+ end
+
+let uppercase s = map Char.uppercase s
+let lowercase s = map Char.lowercase s
+
+let apply1 f s =
+ if length s = 0 then s else begin
+ let r = copy s in
+ unsafe_set r 0 (f(unsafe_get s 0));
+ r
+ end
+
+let capitalize s = apply1 Char.uppercase s
+let uncapitalize s = apply1 Char.lowercase s
+
+let rec index_rec s lim i c =
+ if i >= lim then raise Not_found else
+ if unsafe_get s i = c then i else index_rec s lim (i + 1) c;;
+
+let index s c = index_rec s (length s) 0 c;;
+
+let index_from s i c =
+ let l = length s in
+ if i < 0 || i > l then invalid_arg "Bytes.index_from" else
+ index_rec s l i c;;
+
+let rec rindex_rec s i c =
+ if i < 0 then raise Not_found else
+ if unsafe_get s i = c then i else rindex_rec s (i - 1) c;;
+
+let rindex s c = rindex_rec s (length s - 1) c;;
+
+let rindex_from s i c =
+ if i < -1 || i >= length s then invalid_arg "Bytes.rindex_from" else
+ rindex_rec s i c;;
+
+let contains_from s i c =
+ let l = length s in
+ if i < 0 || i > l then invalid_arg "Bytes.contains_from" else
+ try ignore (index_rec s l i c); true with Not_found -> false;;
+
+let contains s c = contains_from s 0 c;;
+
+let rcontains_from s i c =
+ if i < 0 || i >= length s then invalid_arg "Bytes.rcontains_from" else
+ try ignore (rindex_rec s i c); true with Not_found -> false;;
+
+type t = bytes
+
+let compare (x: t) (y: t) = Pervasives.compare x y
diff --git a/stdlib/bytes.mli b/stdlib/bytes.mli
new file mode 100644
index 000000000..82b28a28c
--- /dev/null
+++ b/stdlib/bytes.mli
@@ -0,0 +1,398 @@
+(***********************************************************************)
+(* *)
+(* OCaml *)
+(* *)
+(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* *)
+(* Copyright 1996 Institut National de Recherche en Informatique et *)
+(* en Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the GNU Library General Public License, with *)
+(* the special exception on linking described in file ../LICENSE. *)
+(* *)
+(***********************************************************************)
+
+(** Byte sequence operations.
+
+ A byte sequence is a mutable data structure that contains a
+ fixed-length sequence of bytes. Each byte can be indexed in
+ constant time for reading or writing.
+
+ Given a byte sequence [s] of length [l], we can access each of the
+ [l] bytes of [s] via its index in the sequence. Indexes start at
+ [0], and we will call an index valid in [s] if it falls within the
+ range [[0...l-1]] (inclusive). A position is the point between two
+ bytes or at the beginning or end of the sequence. We call a
+ position valid in [s] if it falls within the range [[0...l]]
+ (inclusive). Note that the byte at index [n] is between positions
+ [n] and [n+1].
+
+ Two parameters [start] and [len] are said to designate a valid
+ range of [s] if [len >= 0] and [start] and [start+len] are valid
+ positions in [s].
+
+ Byte sequences can be modified in place, for instance via the [set]
+ and [blit] functions described below. See also strings (module
+ {!String}), which are almost the same data structure, but cannot be
+ modified in place.
+
+ Bytes are represented by the OCaml type [char].
+
+ @since 4.02.0
+ *)
+
+external length : bytes -> int = "%string_length"
+(** Return the length (number of bytes) of the argument. *)
+
+external get : bytes -> int -> char = "%string_safe_get"
+(** [get s n] returns the byte at index [n] in argument [s].
+
+ Raise [Invalid_argument] if [n] not a valid index in [s]. *)
+
+external set : bytes -> int -> char -> unit = "%string_safe_set"
+(** [set s n c] modifies [s] in place, replacing the byte at index [n]
+ with [c].
+
+ Raise [Invalid_argument] if [n] is not a valid index in [s]. *)
+
+external create : int -> bytes = "caml_create_string"
+(** [create n] returns a new byte sequence of length [n]. The
+ sequence is uninitialized and contains arbitrary bytes.
+
+ Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *)
+
+val make : int -> char -> bytes
+(** [make n c] returns a new byte sequence of length [n], filled with
+ the byte [c].
+
+ Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *)
+
+val init : int -> (int -> char) -> bytes
+(** [Bytes.init n f] returns a fresh byte sequence of length [n], with
+ character [i] initialized to the result of [f i] (in increasing
+ index order).
+
+ Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *)
+
+val empty : bytes
+(** A byte sequence of size 0. *)
+
+val copy : bytes -> bytes
+(** Return a new byte sequence that contains the same bytes as the
+ argument. *)
+
+val of_string : string -> bytes
+(** Return a new byte sequence that contains the same bytes as the
+ given string. *)
+
+val to_string : bytes -> string
+(** Return a new string that contains the same bytes as the given byte
+ sequence. *)
+
+val sub : bytes -> int -> int -> bytes
+(** [sub s start len] returns a new byte sequence of length [len],
+ containing the subsequence of [s] that starts at position [start]
+ and has length [len].
+
+ Raise [Invalid_argument] if [start] and [len] do not designate a
+ valid range of [s]. *)
+
+val sub_string : bytes -> int -> int -> string
+(** Same as [sub] but return a string instead of a byte sequence. *)
+
+val extend : bytes -> int -> int -> bytes
+(** [extend s left right] returns a new byte sequence that contains
+ the bytes of [s], with [left] uninitialized bytes prepended and
+ [right] uninitialized bytes appended to it. If [left] or [right]
+ is negative, then bytes are removed (instead of appended) from
+ the corresponding side of [s].
+
+ Raise [Invalid_argument] if the result length is negative or
+ longer than {!Sys.max_string_length} bytes. *)
+
+val fill : bytes -> int -> int -> char -> unit
+(** [fill s start len c] modifies [s] in place, replacing [len]
+ characters with [c], starting at [start].
+
+ Raise [Invalid_argument] if [start] and [len] do not designate a
+ valid range of [s]. *)
+
+val blit : bytes -> int -> bytes -> int -> int -> unit
+(** [blit src srcoff dst dstoff len] copies [len] bytes from sequence
+ [src], starting at index [srcoff], to sequence [dst], starting at
+ index [dstoff]. It works correctly even if [src] and [dst] are the
+ same byte sequence, and the source and destination intervals
+ overlap.
+
+ Raise [Invalid_argument] if [srcoff] and [len] do not
+ designate a valid range of [src], or if [dstoff] and [len]
+ do not designate a valid range of [dst]. *)
+
+val blit_string : string -> int -> bytes -> int -> int -> unit
+(** [blit src srcoff dst dstoff len] copies [len] bytes from string
+ [src], starting at index [srcoff], to byte sequence [dst],
+ starting at index [dstoff].
+
+ Raise [Invalid_argument] if [srcoff] and [len] do not
+ designate a valid range of [src], or if [dstoff] and [len]
+ do not designate a valid range of [dst]. *)
+
+val concat : bytes -> bytes list -> bytes
+(** [concat sep sl] concatenates the list of byte sequences [sl],
+ inserting the separator byte sequence [sep] between each, and
+ returns the result as a new byte sequence.
+
+ Raise [Invalid_argument] if the result is longer than
+ {!Sys.max_string_length} bytes. *)
+
+val cat : bytes -> bytes -> bytes
+(** [cat s1 s2] concatenates [s1] and [s2] and returns the result
+ as new byte sequence.
+
+ Raise [Invalid_argument] if the result is longer than
+ {!Sys.max_string_length} bytes. *)
+
+val iter : (char -> unit) -> bytes -> unit
+(** [iter f s] applies function [f] in turn to all the bytes of [s].
+ It is equivalent to [f (get s 0); f (get s 1); ...; f (get s
+ (length s - 1)); ()]. *)
+
+val iteri : (int -> char -> unit) -> bytes -> unit
+(** Same as {!Bytes.iter}, but the function is applied to the index of
+ the byte as first argument and the byte itself as second
+ argument. *)
+
+val map : (char -> char) -> bytes -> bytes
+(** [map f s] applies function [f] in turn to all the bytes of [s]
+ (in increasing index order) and stores the resulting bytes in
+ a new sequence that is returned as the result. *)
+
+val mapi : (int -> char -> char) -> bytes -> bytes
+(** [mapi f s] calls [f] with each character of [s] and its
+ index (in increasing index order) and stores the resulting bytes
+ in a new sequence that is returned as the result. *)
+
+val trim : bytes -> bytes
+(** Return a copy of the argument, without leading and trailing
+ whitespace. The bytes regarded as whitespace are the ASCII
+ characters [' '], ['\012'], ['\n'], ['\r'], and ['\t']. *)
+
+val escaped : bytes -> bytes
+(** Return a copy of the argument, with special characters represented
+ by escape sequences, following the lexical conventions of OCaml.
+
+ Raise [Invalid_argument] if the result is longer than
+ {!Sys.max_string_length} bytes. *)
+
+val index : bytes -> char -> int
+(** [index s c] returns the index of the first occurrence of byte [c]
+ in [s].
+
+ Raise [Not_found] if [c] does not occur in [s]. *)
+
+val rindex : bytes -> char -> int
+(** [rindex s c] returns the index of the last occurrence of byte [c]
+ in [s].
+
+ Raise [Not_found] if [c] does not occur in [s]. *)
+
+val index_from : bytes -> int -> char -> int
+(** [index_from s i c] returns the index of the first occurrence of
+ byte [c] in [s] after position [i]. [Bytes.index s c] is
+ equivalent to [Bytes.index_from s 0 c].
+
+ Raise [Invalid_argument] if [i] is not a valid position in [s].
+ Raise [Not_found] if [c] does not occur in [s] after position [i]. *)
+
+val rindex_from : bytes -> int -> char -> int
+(** [rindex_from s i c] returns the index of the last occurrence of
+ byte [c] in [s] before position [i+1]. [rindex s c] is equivalent
+ to [rindex_from s (Bytes.length s - 1) c].
+
+ Raise [Invalid_argument] if [i+1] is not a valid position in [s].
+ Raise [Not_found] if [c] does not occur in [s] before position [i+1]. *)
+
+val contains : bytes -> char -> bool
+(** [contains s c] tests if byte [c] appears in [s]. *)
+
+val contains_from : bytes -> int -> char -> bool
+(** [contains_from s start c] tests if byte [c] appears in [s] after
+ position [start]. [contains s c] is equivalent to [contains_from
+ s 0 c].
+
+ Raise [Invalid_argument] if [start] is not a valid position in [s]. *)
+
+val rcontains_from : bytes -> int -> char -> bool
+(** [rcontains_from s stop c] tests if byte [c] appears in [s] before
+ position [stop+1].
+
+ Raise [Invalid_argument] if [stop < 0] or [stop+1] is not a valid
+ position in [s]. *)
+
+val uppercase : bytes -> bytes
+(** Return a copy of the argument, with all lowercase letters
+ translated to uppercase, including accented letters of the ISO
+ Latin-1 (8859-1) character set. *)
+
+val lowercase : bytes -> bytes
+(** Return a copy of the argument, with all uppercase letters
+ translated to lowercase, including accented letters of the ISO
+ Latin-1 (8859-1) character set. *)
+
+val capitalize : bytes -> bytes
+(** Return a copy of the argument, with the first byte set to
+ uppercase. *)
+
+val uncapitalize : bytes -> bytes
+(** Return a copy of the argument, with the first byte set to
+ lowercase. *)
+
+type t = bytes
+(** An alias for the type of byte sequences. *)
+
+val compare: t -> t -> int
+(** The comparison function for byte sequences, with the same
+ specification as {!Pervasives.compare}. Along with the type [t],
+ this function [compare] allows the module [Bytes] to be passed as
+ argument to the functors {!Set.Make} and {!Map.Make}. *)
+
+
+(** {4 Unsafe conversions (for advanced users)}
+
+ This section describes unsafe, low-level conversion functions
+ between [bytes] and [string]. They do not copy the internal data;
+ used improperly, they can break the immutability invariant on
+ strings provided by the [-safe-string] option. They are available for
+ expert library authors, but for most purposes you should use the
+ always-correct {!Bytes.to_string} and {!Bytes.of_string} instead.
+*)
+
+val unsafe_to_string : bytes -> string
+(** Unsafely convert a byte sequence into a string.
+
+ To reason about the use of [unsafe_to_string], it is convenient to
+ consider an "ownership" discipline. A piece of code that
+ manipulates some data "owns" it; there are several disjoint ownership
+ modes, including:
+ - Unique ownership: the data may be accessed and mutated
+ - Shared ownership: the data has several owners, that may only
+ access it, not mutate it.
+
+ Unique ownership is linear: passing the data to another piece of
+ code means giving up ownership (we cannot write the
+ data again). A unique owner may decide to make the data shared
+ (giving up mutation rights on it), but shared data may not become
+ uniquely-owned again.
+
+ [unsafe_to_string s] can only be used when the caller owns the byte
+ sequence [s] -- either uniquely or as shared immutable data. The
+ caller gives up ownership of [s], and gains ownership of the
+ returned string.
+
+ There are two valid use-cases that respect this ownership
+ discipline:
+
+ 1. Creating a string by initializing and mutating a byte sequence
+ that is never changed after initialization is performed.
+
+ {[
+let string_init len f : string =
+ let s = Bytes.create len in
+ for i = 0 to len - 1 do Bytes.set s i (f i) done;
+ Bytes.unsafe_to_string s
+ ]}
+
+ This function is safe because the byte sequence [s] will never be
+ accessed or mutated after [unsafe_to_string] is called. The
+ [string_init] code gives up ownership of [s], and returns the
+ ownership of the resulting string to its caller.
+
+ Note that it would be unsafe if [s] was passed as an additional
+ parameter to the function [f] as it could escape this way and be
+ mutated in the future -- [string_init] would give up ownership of
+ [s] to pass it to [f], and could not call [unsafe_to_string]
+ safely.
+
+ We have provided the {!String.init}, {!String.map} and
+ {!String.mapi} functions to cover most cases of building
+ new strings. You should prefer those over [to_string] or
+ [unsafe_to_string] whenever applicable.
+
+ 2. Temporarily giving ownership of a byte sequence to a function
+ that expects a uniquely owned string and returns ownership back, so
+ that we can mutate the sequence again after the call ended.
+
+ {[
+let bytes_length (s : bytes) =
+ String.length (Bytes.unsafe_to_string s)
+ ]}
+
+ In this use-case, we do not promise that [s] will never be mutated
+ after the call to [bytes_length s]. The {!String.length} function
+ temporarily borrows unique ownership of the byte sequence
+ (and sees it as a [string]), but returns this ownership back to
+ the caller, which may assume that [s] is still a valid byte
+ sequence after the call. Note that this is only correct because we
+ know that {!String.length} does not capture its argument -- it could
+ escape by a side-channel such as a memoization combinator.
+
+ The caller may not mutate [s] while the string is borrowed (it has
+ temporarily given up ownership). This affects concurrent programs,
+ but also higher-order functions: if [String.length] returned
+ a closure to be called later, [s] should not be mutated until this
+ closure is fully applied and returns ownership.
+*)
+
+val unsafe_of_string : string -> bytes
+(** Unsafely convert a shared string to a byte sequence that should
+ not be mutated.
+
+ The same ownership discipline that makes [unsafe_to_string]
+ correct applies to [unsafe_of_string]: you may use it if you were
+ the owner of the [string] value, and you will own the return
+ [bytes] in the same mode.
+
+ In practice, unique ownership of string values is extremely
+ difficult to reason about correctly. You should always assume
+ strings are shared, never uniquely owned.
+
+ For example, string literals are implicitly shared by the
+ compiler, so you never uniquely own them.
+
+ {[
+let incorrect = Bytes.unsafe_of_string "hello"
+let s = Bytes.of_string "hello"
+ ]}
+
+ The first declaration is incorrect, because the string literal
+ ["hello"] could be shared by the compiler with other parts of the
+ program, and mutating [incorrect] is a bug. You must always use
+ the second version, which performs a copy and is thus correct.
+
+ Assuming unique ownership of strings that are not string
+ literals, but are (partly) built from string literals, is also
+ incorrect. For example, mutating [unsafe_of_string ("foo" ^ s)]
+ could mutate the shared string ["foo"] -- assuming a rope-like
+ representation of strings. More generally, functions operating on
+ strings will assume shared ownership, they do not preserve unique
+ ownership. It is thus incorrect to assume unique ownership of the
+ result of [unsafe_of_string].
+
+ The only case we have reasonable confidence is safe is if the
+ produced [bytes] is shared -- used as an immutable byte
+ sequence. This is possibly useful for incremental migration of
+ low-level programs that manipulate immutable sequences of bytes
+ (for example {!Marshal.from_bytes}) and previously used the
+ [string] type for this purpose.
+*)
+
+(**/**)
+
+(* The following is for system use only. Do not call directly. *)
+
+external unsafe_get : bytes -> int -> char = "%string_unsafe_get"
+external unsafe_set : bytes -> int -> char -> unit = "%string_unsafe_set"
+external unsafe_blit :
+ bytes -> int -> bytes -> int -> int -> unit
+ = "caml_blit_string" "noalloc"
+external unsafe_fill :
+ bytes -> int -> int -> char -> unit = "caml_fill_string" "noalloc"
diff --git a/stdlib/bytesLabels.ml b/stdlib/bytesLabels.ml
new file mode 100644
index 000000000..8ec8ec9e1
--- /dev/null
+++ b/stdlib/bytesLabels.ml
@@ -0,0 +1,16 @@
+(***********************************************************************)
+(* *)
+(* OCaml *)
+(* *)
+(* Jacques Garrigue, Kyoto University RIMS *)
+(* *)
+(* Copyright 2001 Institut National de Recherche en Informatique et *)
+(* en Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the GNU Library General Public License, with *)
+(* the special exception on linking described in file ../LICENSE. *)
+(* *)
+(***********************************************************************)
+
+(* Module [BytesLabels]: labelled Bytes module *)
+
+include Bytes
diff --git a/stdlib/bytesLabels.mli b/stdlib/bytesLabels.mli
new file mode 100644
index 000000000..d48d95f5c
--- /dev/null
+++ b/stdlib/bytesLabels.mli
@@ -0,0 +1,213 @@
+(***********************************************************************)
+(* *)
+(* OCaml *)
+(* *)
+(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* *)
+(* Copyright 1996 Institut National de Recherche en Informatique et *)
+(* en Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the GNU Library General Public License, with *)
+(* the special exception on linking described in file ../LICENSE. *)
+(* *)
+(***********************************************************************)
+
+(** Byte sequence operations. *)
+
+external length : bytes -> int = "%string_length"
+(** Return the length (number of bytes) of the argument. *)
+
+external get : bytes -> int -> char = "%string_safe_get"
+(** [get s n] returns the byte at index [n] in argument [s].
+
+ Raise [Invalid_argument] if [n] not a valid index in [s]. *)
+
+
+external set : bytes -> int -> char -> unit = "%string_safe_set"
+(** [set s n c] modifies [s] in place, replacing the byte at index [n]
+ with [c].
+
+ Raise [Invalid_argument] if [n] is not a valid index in [s]. *)
+
+external create : int -> bytes = "caml_create_string"
+(** [create n] returns a new byte sequence of length [n]. The
+ sequence is uninitialized and contains arbitrary bytes.
+
+ Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *)
+
+val make : int -> char -> bytes
+(** [make n c] returns a new byte sequence of length [n], filled with
+ the byte [c].
+
+ Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *)
+
+val init : int -> f:(int -> char) -> bytes
+(** [init n f] returns a fresh byte sequence of length [n],
+ with character [i] initialized to the result of [f i].
+
+ Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *)
+
+val empty : bytes
+(** A byte sequence of size 0. *)
+
+val copy : bytes -> bytes
+(** Return a new byte sequence that contains the same bytes as the
+ argument. *)
+
+val of_string : string -> bytes
+(** Return a new byte sequence that contains the same bytes as the
+ given string. *)
+
+val to_string : bytes -> string
+(** Return a new string that contains the same bytes as the given byte
+ sequence. *)
+
+val sub : bytes -> pos:int -> len:int -> bytes
+(** [sub s start len] returns a new byte sequence of length [len],
+ containing the subsequence of [s] that starts at position [start]
+ and has length [len].
+
+ Raise [Invalid_argument] if [start] and [len] do not designate a
+ valid range of [s]. *)
+
+val sub_string : bytes -> int -> int -> string
+(** Same as [sub] but return a string instead of a byte sequence. *)
+
+val fill : bytes -> pos:int -> len:int -> char -> unit
+(** [fill s start len c] modifies [s] in place, replacing [len]
+ characters with [c], starting at [start].
+
+ Raise [Invalid_argument] if [start] and [len] do not designate a
+ valid range of [s]. *)
+
+val blit :
+ src:bytes -> src_pos:int -> dst:bytes -> dst_pos:int -> len:int
+ -> unit
+(** [blit src srcoff dst dstoff len] copies [len] bytes from sequence
+ [src], starting at index [srcoff], to sequence [dst], starting at
+ index [dstoff]. It works correctly even if [src] and [dst] are the
+ same byte sequence, and the source and destination intervals
+ overlap.
+
+ Raise [Invalid_argument] if [srcoff] and [len] do not
+ designate a valid range of [src], or if [dstoff] and [len]
+ do not designate a valid range of [dst]. *)
+
+val concat : sep:bytes -> bytes list -> bytes
+(** [concat sep sl] concatenates the list of byte sequences [sl],
+ inserting the separator byte sequence [sep] between each, and
+ returns the result as a new byte sequence. *)
+
+val iter : f:(char -> unit) -> bytes -> unit
+(** [iter f s] applies function [f] in turn to all the bytes of [s].
+ It is equivalent to [f (get s 0); f (get s 1); ...; f (get s
+ (length s - 1)); ()]. *)
+
+val iteri : f:(int -> char -> unit) -> bytes -> unit
+(** Same as {!Bytes.iter}, but the function is applied to the index of
+ the byte as first argument and the byte itself as second
+ argument. *)
+
+val map : f:(char -> char) -> bytes -> bytes
+(** [map f s] applies function [f] in turn to all the bytes of [s] and
+ stores the resulting bytes in a new sequence that is returned as
+ the result. *)
+
+val mapi : f:(int -> char -> char) -> bytes -> bytes
+(** [mapi f s] calls [f] with each character of [s] and its
+ index (in increasing index order) and stores the resulting bytes
+ in a new sequence that is returned as the result. *)
+
+val trim : bytes -> bytes
+(** Return a copy of the argument, without leading and trailing
+ whitespace. The bytes regarded as whitespace are the ASCII
+ characters [' '], ['\012'], ['\n'], ['\r'], and ['\t']. *)
+
+val escaped : bytes -> bytes
+(** Return a copy of the argument, with special characters represented
+ by escape sequences, following the lexical conventions of OCaml. *)
+
+val index : bytes -> char -> int
+(** [index s c] returns the index of the first occurrence of byte [c]
+ in [s].
+
+ Raise [Not_found] if [c] does not occur in [s]. *)
+
+val rindex : bytes -> char -> int
+(** [rindex s c] returns the index of the last occurrence of byte [c]
+ in [s].
+
+ Raise [Not_found] if [c] does not occur in [s]. *)
+
+val index_from : bytes -> int -> char -> int
+(** [index_from s i c] returns the index of the first occurrence of
+ byte [c] in [s] after position [i]. [Bytes.index s c] is
+ equivalent to [Bytes.index_from s 0 c].
+
+ Raise [Invalid_argument] if [i] is not a valid position in [s].
+ Raise [Not_found] if [c] does not occur in [s] after position [i]. *)
+
+val rindex_from : bytes -> int -> char -> int
+(** [rindex_from s i c] returns the index of the last occurrence of
+ byte [c] in [s] before position [i+1]. [rindex s c] is equivalent
+ to [rindex_from s (Bytes.length s - 1) c].
+
+ Raise [Invalid_argument] if [i+1] is not a valid position in [s].
+ Raise [Not_found] if [c] does not occur in [s] before position [i+1]. *)
+
+val contains : bytes -> char -> bool
+(** [contains s c] tests if byte [c] appears in [s]. *)
+
+val contains_from : bytes -> int -> char -> bool
+(** [contains_from s start c] tests if byte [c] appears in [s] after
+ position [start]. [contains s c] is equivalent to [contains_from
+ s 0 c].
+
+ Raise [Invalid_argument] if [start] is not a valid position in [s]. *)
+
+val rcontains_from : bytes -> int -> char -> bool
+(** [rcontains_from s stop c] tests if byte [c] appears in [s] before
+ position [stop+1].
+
+ Raise [Invalid_argument] if [stop < 0] or [stop+1] is not a valid
+ position in [s]. *)
+
+val uppercase : bytes -> bytes
+(** Return a copy of the argument, with all lowercase letters
+ translated to uppercase, including accented letters of the ISO
+ Latin-1 (8859-1) character set. *)
+
+val lowercase : bytes -> bytes
+(** Return a copy of the argument, with all uppercase letters
+ translated to lowercase, including accented letters of the ISO
+ Latin-1 (8859-1) character set. *)
+
+val capitalize : bytes -> bytes
+(** Return a copy of the argument, with the first byte set to
+ uppercase. *)
+
+val uncapitalize : bytes -> bytes
+(** Return a copy of the argument, with the first byte set to
+ lowercase. *)
+
+type t = bytes
+(** An alias for the type of byte sequences. *)
+
+val compare: t -> t -> int
+(** The comparison function for byte sequences, with the same
+ specification as {!Pervasives.compare}. Along with the type [t],
+ this function [compare] allows the module [Bytes] to be passed as
+ argument to the functors {!Set.Make} and {!Map.Make}. *)
+
+(**/**)
+
+(* The following is for system use only. Do not call directly. *)
+
+external unsafe_get : bytes -> int -> char = "%string_unsafe_get"
+external unsafe_set : bytes -> int -> char -> unit = "%string_unsafe_set"
+external unsafe_blit :
+ src:bytes -> src_pos:int -> dst:bytes -> dst_pos:int -> len:int ->
+ unit = "caml_blit_string" "noalloc"
+external unsafe_fill :
+ bytes -> pos:int -> len:int -> char -> unit = "caml_fill_string" "noalloc"
+val unsafe_to_string : bytes -> string
+val unsafe_of_string : string -> bytes
diff --git a/stdlib/camlinternalFormat.ml b/stdlib/camlinternalFormat.ml
new file mode 100644
index 000000000..7fb82dbe2
--- /dev/null
+++ b/stdlib/camlinternalFormat.ml
@@ -0,0 +1,2644 @@
+open CamlinternalFormatBasics
+
+(******************************************************************************)
+ (* Tools to manipulate scanning set of chars (see %[...]) *)
+
+type mutable_char_set = bytes
+
+(* Create a fresh, empty, mutable char set. *)
+let create_char_set () = Bytes.make 32 '\000'
+
+(* Add a char in a mutable char set. *)
+let add_in_char_set char_set c =
+ let ind = int_of_char c in
+ let str_ind = ind lsr 3 and mask = 1 lsl (ind land 0b111) in
+ Bytes.set char_set str_ind
+ (char_of_int (int_of_char (Bytes.get char_set str_ind) lor mask))
+
+let freeze_char_set char_set =
+ Bytes.to_string char_set
+
+(* Compute the complement of a char set. *)
+let rev_char_set char_set =
+ let char_set' = create_char_set () in
+ for i = 0 to 31 do
+ Bytes.set char_set' i
+ (char_of_int (int_of_char (String.get char_set i) lxor 0xFF));
+ done;
+ Bytes.unsafe_to_string char_set'
+
+(* Return true if a `c' is in `char_set'. *)
+let is_in_char_set char_set c =
+ let ind = int_of_char c in
+ let str_ind = ind lsr 3 and mask = 1 lsl (ind land 0b111) in
+ (int_of_char (String.get char_set str_ind) land mask) <> 0
+
+
+(******************************************************************************)
+ (* Ignored param conversion *)
+
+(* GADT used to abstract an existential type parameter. *)
+(* See param_format_of_ignored_format. *)
+type ('a, 'b, 'c, 'd, 'e, 'f) param_format_ebb = Param_format_EBB :
+ ('x -> 'a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('a, 'b, 'c, 'd, 'e, 'f) param_format_ebb
+
+(* Compute a padding associated to a pad_option (see "%_42d"). *)
+let pad_of_pad_opt pad_opt = match pad_opt with
+ | None -> No_padding
+ | Some width -> Lit_padding (Right, width)
+
+(* Compute a precision associated to a prec_option (see "%_.42f"). *)
+let prec_of_prec_opt prec_opt = match prec_opt with
+ | None -> No_precision
+ | Some ndec -> Lit_precision ndec
+
+(* Turn an ignored param into its equivalent not-ignored format node. *)
+(* Used for format pretty-printing and Scanf. *)
+let param_format_of_ignored_format : type a b c d e f x y .
+ (a, b, c, d, y, x) ignored -> (x, b, c, y, e, f) fmt ->
+ (a, b, c, d, e, f) param_format_ebb =
+fun ign fmt -> match ign with
+ | Ignored_char ->
+ Param_format_EBB (Char fmt)
+ | Ignored_caml_char ->
+ Param_format_EBB (Caml_char fmt)
+ | Ignored_string pad_opt ->
+ Param_format_EBB (String (pad_of_pad_opt pad_opt, fmt))
+ | Ignored_caml_string pad_opt ->
+ Param_format_EBB (Caml_string (pad_of_pad_opt pad_opt, fmt))
+ | Ignored_int (iconv, pad_opt) ->
+ Param_format_EBB (Int (iconv, pad_of_pad_opt pad_opt, No_precision, fmt))
+ | Ignored_int32 (iconv, pad_opt) ->
+ Param_format_EBB
+ (Int32 (iconv, pad_of_pad_opt pad_opt, No_precision, fmt))
+ | Ignored_nativeint (iconv, pad_opt) ->
+ Param_format_EBB
+ (Nativeint (iconv, pad_of_pad_opt pad_opt, No_precision, fmt))
+ | Ignored_int64 (iconv, pad_opt) ->
+ Param_format_EBB
+ (Int64 (iconv, pad_of_pad_opt pad_opt, No_precision, fmt))
+ | Ignored_float (pad_opt, prec_opt) ->
+ Param_format_EBB
+ (Float (Float_f, pad_of_pad_opt pad_opt, prec_of_prec_opt prec_opt, fmt))
+ | Ignored_bool ->
+ Param_format_EBB (Bool fmt)
+ | Ignored_format_arg (pad_opt, fmtty) ->
+ Param_format_EBB (Format_arg (pad_opt, fmtty, fmt))
+ | Ignored_format_subst (pad_opt, fmtty) ->
+ Param_format_EBB
+ (Format_subst (pad_opt, fmtty, fmt))
+ | Ignored_reader ->
+ Param_format_EBB (Reader fmt)
+ | Ignored_scan_char_set (width_opt, char_set) ->
+ Param_format_EBB (Scan_char_set (width_opt, char_set, fmt))
+ | Ignored_scan_get_counter counter ->
+ Param_format_EBB (Scan_get_counter (counter, fmt))
+
+
+(******************************************************************************)
+ (* Types *)
+
+type ('b, 'c) acc_formatting_gen =
+ | Acc_open_tag of ('b, 'c) acc
+ | Acc_open_box of ('b, 'c) acc
+
+(* Reversed list of printing atoms. *)
+(* Used to accumulate printf arguments. *)
+and ('b, 'c) acc =
+ | Acc_formatting_lit of ('b, 'c) acc * formatting_lit (* Special fmtting (box) *)
+ | Acc_formatting_gen of ('b, 'c) acc * ('b, 'c) acc_formatting_gen (* Special fmtting (box) *)
+ | Acc_string_literal of ('b, 'c) acc * string (* Literal string *)
+ | Acc_char_literal of ('b, 'c) acc * char (* Literal char *)
+ | Acc_data_string of ('b, 'c) acc * string (* Generated string *)
+ | Acc_data_char of ('b, 'c) acc * char (* Generated char *)
+ | Acc_delay of ('b, 'c) acc * ('b -> 'c) (* Delayed printing (%a, %t) *)
+ | Acc_flush of ('b, 'c) acc (* Flush *)
+ | Acc_invalid_arg of ('b, 'c) acc * string (* Raise Invalid_argument msg *)
+ | End_of_acc
+
+(* List of heterogeneous values. *)
+(* Used to accumulate scanf callback arguments. *)
+type ('a, 'b) heter_list =
+ | Cons : 'c * ('a, 'b) heter_list -> ('c -> 'a, 'b) heter_list
+ | Nil : ('b, 'b) heter_list
+
+(* Existential Black Boxes. *)
+(* Used to abstract some existential type parameters. *)
+
+(* GADT type associating a padding and an fmtty. *)
+(* See the type_padding function. *)
+type ('a, 'b, 'c, 'd, 'e, 'f) padding_fmtty_ebb = Padding_fmtty_EBB :
+ ('x, 'y) padding * ('y, 'b, 'c, 'd, 'e, 'f) fmtty ->
+ ('x, 'b, 'c, 'd, 'e, 'f) padding_fmtty_ebb
+
+(* GADT type associating a padding, a precision and an fmtty. *)
+(* See the type_padprec function. *)
+type ('a, 'b, 'c, 'd, 'e, 'f) padprec_fmtty_ebb = Padprec_fmtty_EBB :
+ ('x, 'y) padding * ('y, 'z) precision * ('z, 'b, 'c, 'd, 'e, 'f) fmtty ->
+ ('x, 'b, 'c, 'd, 'e, 'f) padprec_fmtty_ebb
+
+(* GADT type associating a padding and an fmt. *)
+(* See make_padding_fmt_ebb and parse_format functions. *)
+type ('a, 'b, 'c, 'e, 'f) padding_fmt_ebb = Padding_fmt_EBB :
+ (_, 'x -> 'a) padding *
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('x, 'b, 'c, 'e, 'f) padding_fmt_ebb
+
+(* GADT type associating a precision and an fmt. *)
+(* See make_precision_fmt_ebb and parse_format functions. *)
+type ('a, 'b, 'c, 'e, 'f) precision_fmt_ebb = Precision_fmt_EBB :
+ (_, 'x -> 'a) precision *
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('x, 'b, 'c, 'e, 'f) precision_fmt_ebb
+
+(* GADT type associating a padding, a precision and an fmt. *)
+(* See make_padprec_fmt_ebb and parse_format functions. *)
+type ('p, 'b, 'c, 'e, 'f) padprec_fmt_ebb = Padprec_fmt_EBB :
+ ('x, 'y) padding * ('y, 'p -> 'a) precision *
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('p, 'b, 'c, 'e, 'f) padprec_fmt_ebb
+
+(* Abstract the 'a and 'd parameters of an fmt. *)
+(* Output type of the format parsing function. *)
+type ('b, 'c, 'e, 'f) fmt_ebb = Fmt_EBB :
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('b, 'c, 'e, 'f) fmt_ebb
+
+(* GADT type associating an fmtty and an fmt. *)
+(* See the type_format_gen function. *)
+type ('a, 'b, 'c, 'd, 'e, 'f) fmt_fmtty_ebb = Fmt_fmtty_EBB :
+ ('a, 'b, 'c, 'd, 'y, 'x) fmt *
+ ('x, 'b, 'c, 'y, 'e, 'f) fmtty ->
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt_fmtty_ebb
+
+(* GADT type associating an fmtty and an fmt. *)
+(* See the type_ignored_format_substitution function. *)
+type ('a, 'b, 'c, 'd, 'e, 'f) fmtty_fmt_ebb = Fmtty_fmt_EBB :
+ ('a, 'b, 'c, 'd, 'y, 'x) fmtty *
+ ('x, 'b, 'c, 'y, 'e, 'f) fmt_fmtty_ebb ->
+ ('a, 'b, 'c, 'd, 'e, 'f) fmtty_fmt_ebb
+
+(* Abstract all fmtty type parameters. *)
+(* Used to compare format types. *)
+type fmtty_ebb = Fmtty_EBB : ('a, 'b, 'c, 'd, 'e, 'f) fmtty -> fmtty_ebb
+
+(* Abstract all padding type parameters. *)
+(* Used to compare paddings. *)
+type padding_ebb = Padding_EBB : ('a, 'b) padding -> padding_ebb
+
+(* Abstract all precision type parameters. *)
+(* Used to compare precisions. *)
+type precision_ebb = Precision_EBB : ('a, 'b) precision -> precision_ebb
+
+(******************************************************************************)
+ (* Constants *)
+
+(* Default precision for float printing. *)
+let default_float_precision = 6
+
+(******************************************************************************)
+ (* Externals *)
+
+external format_float: string -> float -> string
+ = "caml_format_float"
+external format_int: string -> int -> string
+ = "caml_format_int"
+external format_int32: string -> int32 -> string
+ = "caml_int32_format"
+external format_nativeint: string -> nativeint -> string
+ = "caml_nativeint_format"
+external format_int64: string -> int64 -> string
+ = "caml_int64_format"
+
+(******************************************************************************)
+ (* Tools to pretty-print formats *)
+
+(* Type of extensible character buffers. *)
+type buffer = {
+ mutable ind : int;
+ mutable bytes : bytes;
+}
+
+(* Create a fresh buffer. *)
+let buffer_create init_size = { ind = 0; bytes = Bytes.create init_size }
+
+(* Check size of the buffer and grow it if needed. *)
+let buffer_check_size buf overhead =
+ let len = Bytes.length buf.bytes in
+ let min_len = buf.ind + overhead in
+ if min_len > len then (
+ let new_len = max (len * 2) min_len in
+ let new_str = Bytes.create new_len in
+ Bytes.blit buf.bytes 0 new_str 0 len;
+ buf.bytes <- new_str;
+ )
+
+(* Add the character `c' to the buffer `buf'. *)
+let buffer_add_char buf c =
+ buffer_check_size buf 1;
+ Bytes.set buf.bytes buf.ind c;
+ buf.ind <- buf.ind + 1
+
+(* Add the string `s' to the buffer `buf'. *)
+let buffer_add_string buf s =
+ let str_len = String.length s in
+ buffer_check_size buf str_len;
+ String.blit s 0 buf.bytes buf.ind str_len;
+ buf.ind <- buf.ind + str_len
+
+(* Get the content of the buffer. *)
+let buffer_contents buf =
+ Bytes.sub_string buf.bytes 0 buf.ind
+
+(***)
+
+(* Convert an integer conversion to char. *)
+let char_of_iconv iconv = match iconv with
+ | Int_d | Int_pd | Int_sd -> 'd' | Int_i | Int_pi | Int_si -> 'i'
+ | Int_x | Int_Cx -> 'x' | Int_X | Int_CX -> 'X' | Int_o | Int_Co -> 'o'
+ | Int_u -> 'u'
+
+(* Convert a float conversion to char. *)
+let char_of_fconv fconv = match fconv with
+ | Float_f | Float_pf | Float_sf -> 'f' | Float_e | Float_pe | Float_se -> 'e'
+ | Float_E | Float_pE | Float_sE -> 'E' | Float_g | Float_pg | Float_sg -> 'g'
+ | Float_G | Float_pG | Float_sG -> 'G' | Float_F -> 'F'
+
+(* Convert a scanning counter to char. *)
+let char_of_counter counter = match counter with
+ | Line_counter -> 'l'
+ | Char_counter -> 'n'
+ | Token_counter -> 'N'
+
+(***)
+
+(* Print a char_set in a buffer with the OCaml format lexical convention. *)
+let bprint_char_set buf char_set =
+ let rec print_start set =
+ let is_alone c =
+ let before, after = Char.(chr (code c - 1), chr (code c + 1)) in
+ is_in_char_set set c
+ && not (is_in_char_set set before && is_in_char_set set after) in
+ if is_alone ']' then buffer_add_char buf ']';
+ print_out set 1;
+ if is_alone '-' then buffer_add_char buf '-';
+ and print_out set i =
+ if i < 256 then
+ if is_in_char_set set (char_of_int i) then print_first set i
+ else print_out set (i + 1)
+ and print_first set i =
+ match char_of_int i with
+ | '\255' -> print_char buf 255;
+ | ']' | '-' -> print_out set (i + 1);
+ | _ -> print_second set (i + 1);
+ and print_second set i =
+ if is_in_char_set set (char_of_int i) then
+ match char_of_int i with
+ | '\255' ->
+ print_char buf 254;
+ print_char buf 255;
+ | ']' | '-' when not (is_in_char_set set (char_of_int (i + 1))) ->
+ print_char buf (i - 1);
+ print_out set (i + 1);
+ | _ when not (is_in_char_set set (char_of_int (i + 1))) ->
+ print_char buf (i - 1);
+ print_char buf i;
+ print_out set (i + 2);
+ | _ ->
+ print_in set (i - 1) (i + 2);
+ else (
+ print_char buf (i - 1);
+ print_out set (i + 1);
+ )
+ and print_in set i j =
+ if j = 256 || not (is_in_char_set set (char_of_int j)) then (
+ print_char buf i;
+ print_char buf (int_of_char '-');
+ print_char buf (j - 1);
+ if j < 256 then print_out set (j + 1);
+ ) else
+ print_in set i (j + 1);
+ and print_char buf i = match char_of_int i with
+ | '%' -> buffer_add_char buf '%'; buffer_add_char buf '%';
+ | '@' -> buffer_add_char buf '%'; buffer_add_char buf '@';
+ | c -> buffer_add_char buf c;
+ in
+ buffer_add_char buf '[';
+ print_start (
+ if is_in_char_set char_set '\000'
+ then ( buffer_add_char buf '^'; rev_char_set char_set )
+ else char_set
+ );
+ buffer_add_char buf ']'
+
+(***)
+
+(* Print a padty in a buffer with the format-like syntax. *)
+let bprint_padty buf padty = match padty with
+ | Left -> buffer_add_char buf '-'
+ | Right -> ()
+ | Zeros -> buffer_add_char buf '0'
+
+(* Print the '_' of an ignored flag if needed. *)
+let bprint_ignored_flag buf ign_flag =
+ if ign_flag then buffer_add_char buf '_'
+
+(***)
+
+let bprint_pad_opt buf pad_opt = match pad_opt with
+ | None -> ()
+ | Some width -> buffer_add_string buf (string_of_int width)
+
+(***)
+
+(* Print padding in a buffer with the format-like syntax. *)
+let bprint_padding : type a b . buffer -> (a, b) padding -> unit =
+fun buf pad -> match pad with
+ | No_padding -> ()
+ | Lit_padding (padty, n) ->
+ bprint_padty buf padty;
+ buffer_add_string buf (string_of_int n);
+ | Arg_padding padty ->
+ bprint_padty buf padty;
+ buffer_add_char buf '*'
+
+(* Print precision in a buffer with the format-like syntax. *)
+let bprint_precision : type a b . buffer -> (a, b) precision -> unit =
+ fun buf prec -> match prec with
+ | No_precision -> ()
+ | Lit_precision n ->
+ buffer_add_char buf '.';
+ buffer_add_string buf (string_of_int n);
+ | Arg_precision ->
+ buffer_add_string buf ".*"
+
+(***)
+
+(* Print the optionnal '+', ' ' or '#' associated to an int conversion. *)
+let bprint_iconv_flag buf iconv = match iconv with
+ | Int_pd | Int_pi -> buffer_add_char buf '+'
+ | Int_sd | Int_si -> buffer_add_char buf ' '
+ | Int_Cx | Int_CX | Int_Co -> buffer_add_char buf '#'
+ | Int_d | Int_i | Int_x | Int_X | Int_o | Int_u -> ()
+
+(* Print an complete int format in a buffer (ex: "%3.*d"). *)
+let bprint_int_fmt buf ign_flag iconv pad prec =
+ buffer_add_char buf '%';
+ bprint_ignored_flag buf ign_flag;
+ bprint_iconv_flag buf iconv;
+ bprint_padding buf pad;
+ bprint_precision buf prec;
+ buffer_add_char buf (char_of_iconv iconv)
+
+(* Print a complete int32, nativeint or int64 format in a buffer. *)
+let bprint_altint_fmt buf ign_flag iconv pad prec c =
+ buffer_add_char buf '%';
+ bprint_ignored_flag buf ign_flag;
+ bprint_iconv_flag buf iconv;
+ bprint_padding buf pad;
+ bprint_precision buf prec;
+ buffer_add_char buf c;
+ buffer_add_char buf (char_of_iconv iconv)
+
+(***)
+
+(* Print the optionnal '+' associated to a float conversion. *)
+let bprint_fconv_flag buf fconv = match fconv with
+ | Float_pf | Float_pe | Float_pE | Float_pg | Float_pG ->
+ buffer_add_char buf '+'
+ | Float_sf | Float_se | Float_sE | Float_sg | Float_sG ->
+ buffer_add_char buf ' '
+ | Float_f | Float_e | Float_E | Float_g | Float_G | Float_F ->
+ ()
+
+(* Print a complete float format in a buffer (ex: "%+*.3f"). *)
+let bprint_float_fmt buf ign_flag fconv pad prec =
+ buffer_add_char buf '%';
+ bprint_ignored_flag buf ign_flag;
+ bprint_fconv_flag buf fconv;
+ bprint_padding buf pad;
+ bprint_precision buf prec;
+ buffer_add_char buf (char_of_fconv fconv)
+
+(* Compute the literal string representation of a formatting_lit. *)
+(* Also used by Printf and Scanf where formatting is not interpreted. *)
+let string_of_formatting_lit formatting_lit = match formatting_lit with
+ | Close_box -> "@]"
+ | Close_tag -> "@}"
+ | Break (str, _, _) -> str
+ | FFlush -> "@?"
+ | Force_newline -> "@\n"
+ | Flush_newline -> "@."
+ | Magic_size (str, _) -> str
+ | Escaped_at -> "@@"
+ | Escaped_percent -> "@%"
+ | Scan_indic c -> "@" ^ (String.make 1 c)
+
+(* Compute the literal string representation of a formatting. *)
+(* Also used by Printf and Scanf where formatting is not interpreted. *)
+let string_of_formatting_gen : type a b c d e f .
+ (a, b, c, d, e, f) formatting_gen -> string =
+ fun formatting_gen -> match formatting_gen with
+ | Open_tag (Format (_, str)) -> str
+ | Open_box (Format (_, str)) -> str
+
+(***)
+
+(* Print a literal char in a buffer, escape '%' by "%%". *)
+let bprint_char_literal buf chr = match chr with
+ | '%' -> buffer_add_string buf "%%"
+ | _ -> buffer_add_char buf chr
+
+(* Print a literal string in a buffer, escape all '%' by "%%". *)
+let bprint_string_literal buf str =
+ for i = 0 to String.length str - 1 do
+ bprint_char_literal buf str.[i]
+ done
+
+(******************************************************************************)
+ (* Format pretty-printing *)
+
+(* Print a complete format type (an fmtty) in a buffer. *)
+let rec bprint_fmtty : type a b c d e f g h i j k l .
+ buffer -> (a, b, c, d, e, f, g, h, i, j, k, l) fmtty_rel -> unit =
+fun buf fmtty -> match fmtty with
+ | Char_ty rest -> buffer_add_string buf "%c"; bprint_fmtty buf rest;
+ | String_ty rest -> buffer_add_string buf "%s"; bprint_fmtty buf rest;
+ | Int_ty rest -> buffer_add_string buf "%i"; bprint_fmtty buf rest;
+ | Int32_ty rest -> buffer_add_string buf "%li"; bprint_fmtty buf rest;
+ | Nativeint_ty rest -> buffer_add_string buf "%ni"; bprint_fmtty buf rest;
+ | Int64_ty rest -> buffer_add_string buf "%Li"; bprint_fmtty buf rest;
+ | Float_ty rest -> buffer_add_string buf "%f"; bprint_fmtty buf rest;
+ | Bool_ty rest -> buffer_add_string buf "%B"; bprint_fmtty buf rest;
+ | Alpha_ty rest -> buffer_add_string buf "%a"; bprint_fmtty buf rest;
+ | Theta_ty rest -> buffer_add_string buf "%t"; bprint_fmtty buf rest;
+ | Reader_ty rest -> buffer_add_string buf "%r"; bprint_fmtty buf rest;
+
+ | Ignored_reader_ty rest ->
+ buffer_add_string buf "%_r";
+ bprint_fmtty buf rest;
+
+ | Format_arg_ty (sub_fmtty, rest) ->
+ buffer_add_string buf "%{"; bprint_fmtty buf sub_fmtty;
+ buffer_add_string buf "%}"; bprint_fmtty buf rest;
+ | Format_subst_ty (sub_fmtty, _, rest) ->
+ buffer_add_string buf "%("; bprint_fmtty buf sub_fmtty;
+ buffer_add_string buf "%)"; bprint_fmtty buf rest;
+
+ | End_of_fmtty -> ()
+
+(***)
+
+(* Print a complete format in a buffer. *)
+let bprint_fmt buf fmt =
+ let rec fmtiter : type a b c d e f .
+ (a, b, c, d, e, f) fmt -> bool -> unit =
+ fun fmt ign_flag -> match fmt with
+ | String (pad, rest) ->
+ buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag;
+ bprint_padding buf pad; buffer_add_char buf 's';
+ fmtiter rest false;
+ | Caml_string (pad, rest) ->
+ buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag;
+ bprint_padding buf pad; buffer_add_char buf 'S';
+ fmtiter rest false;
+
+ | Int (iconv, pad, prec, rest) ->
+ bprint_int_fmt buf ign_flag iconv pad prec;
+ fmtiter rest false;
+ | Int32 (iconv, pad, prec, rest) ->
+ bprint_altint_fmt buf ign_flag iconv pad prec 'l';
+ fmtiter rest false;
+ | Nativeint (iconv, pad, prec, rest) ->
+ bprint_altint_fmt buf ign_flag iconv pad prec 'n';
+ fmtiter rest false;
+ | Int64 (iconv, pad, prec, rest) ->
+ bprint_altint_fmt buf ign_flag iconv pad prec 'L';
+ fmtiter rest false;
+ | Float (fconv, pad, prec, rest) ->
+ bprint_float_fmt buf ign_flag fconv pad prec;
+ fmtiter rest false;
+
+ | Char rest ->
+ buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag;
+ buffer_add_char buf 'c'; fmtiter rest false;
+ | Caml_char rest ->
+ buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag;
+ buffer_add_char buf 'C'; fmtiter rest false;
+ | Bool rest ->
+ buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag;
+ buffer_add_char buf 'B'; fmtiter rest false;
+ | Alpha rest ->
+ buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag;
+ buffer_add_char buf 'a'; fmtiter rest false;
+ | Theta rest ->
+ buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag;
+ buffer_add_char buf 't'; fmtiter rest false;
+ | Reader rest ->
+ buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag;
+ buffer_add_char buf 'r'; fmtiter rest false;
+ | Flush rest ->
+ buffer_add_string buf "%!";
+ fmtiter rest ign_flag;
+
+ | String_literal (str, rest) ->
+ bprint_string_literal buf str;
+ fmtiter rest ign_flag;
+ | Char_literal (chr, rest) ->
+ bprint_char_literal buf chr;
+ fmtiter rest ign_flag;
+
+ | Format_arg (pad_opt, fmtty, rest) ->
+ buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag;
+ bprint_pad_opt buf pad_opt; buffer_add_char buf '{';
+ bprint_fmtty buf fmtty; buffer_add_char buf '%'; buffer_add_char buf '}';
+ fmtiter rest false;
+ | Format_subst (pad_opt, fmtty, rest) ->
+ buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag;
+ bprint_pad_opt buf pad_opt; buffer_add_char buf '(';
+ bprint_fmtty buf fmtty; buffer_add_char buf '%'; buffer_add_char buf ')';
+ fmtiter rest false;
+
+ | Scan_char_set (width_opt, char_set, rest) ->
+ buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag;
+ bprint_pad_opt buf width_opt; bprint_char_set buf char_set;
+ fmtiter rest false;
+ | Scan_get_counter (counter, rest) ->
+ buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag;
+ buffer_add_char buf (char_of_counter counter);
+ fmtiter rest false;
+ | Ignored_param (ign, rest) ->
+ let Param_format_EBB fmt' = param_format_of_ignored_format ign rest in
+ fmtiter fmt' true;
+
+ | Formatting_lit (fmting_lit, rest) ->
+ bprint_string_literal buf (string_of_formatting_lit fmting_lit);
+ fmtiter rest ign_flag;
+ | Formatting_gen (fmting_gen, rest) ->
+ bprint_string_literal buf "@{";
+ bprint_string_literal buf (string_of_formatting_gen fmting_gen);
+ fmtiter rest ign_flag;
+
+ | End_of_format -> ()
+
+ in fmtiter fmt false
+
+(***)
+
+(* Convert a format to string. *)
+let string_of_fmt fmt =
+ let buf = buffer_create 16 in
+ bprint_fmt buf fmt;
+ buffer_contents buf
+
+(******************************************************************************)
+ (* Type extraction *)
+
+type (_, _) eq = Refl : ('a, 'a) eq
+
+(* Invariant: this function is the identity on values.
+
+ In particular, if (ty1, ty2) have equal values, then
+ (trans (symm ty1) ty2) respects the 'trans' precondition. *)
+let rec symm : type a1 b1 c1 d1 e1 f1 a2 b2 c2 d2 e2 f2 .
+ (a1, b1, c1, d1, e1, f1,
+ a2, b2, c2, d2, e2, f2) fmtty_rel
+-> (a2, b2, c2, d2, e2, f2,
+ a1, b1, c1, d1, e1, f1) fmtty_rel
+= function
+ | Char_ty rest -> Char_ty (symm rest)
+ | Int_ty rest -> Int_ty (symm rest)
+ | Int32_ty rest -> Int32_ty (symm rest)
+ | Int64_ty rest -> Int64_ty (symm rest)
+ | Nativeint_ty rest -> Nativeint_ty (symm rest)
+ | Float_ty rest -> Float_ty (symm rest)
+ | Bool_ty rest -> Bool_ty (symm rest)
+ | String_ty rest -> String_ty (symm rest)
+ | Theta_ty rest -> Theta_ty (symm rest)
+ | Alpha_ty rest -> Alpha_ty (symm rest)
+ | Reader_ty rest -> Reader_ty (symm rest)
+ | Ignored_reader_ty rest -> Ignored_reader_ty (symm rest)
+ | Format_arg_ty (ty, rest) ->
+ Format_arg_ty (ty, symm rest)
+ | Format_subst_ty (ty1, ty2, rest) ->
+ Format_subst_ty (ty2, ty1, symm rest)
+ | End_of_fmtty -> End_of_fmtty
+
+let rec fmtty_rel_det : type a1 b c d1 e1 f1 a2 d2 e2 f2 .
+ (a1, b, c, d1, e1, f1,
+ a2, b, c, d2, e2, f2) fmtty_rel ->
+ ((f1, f2) eq -> (a1, a2) eq)
+ * ((a1, a2) eq -> (f1, f2) eq)
+ * ((e1, e2) eq -> (d1, d2) eq)
+ * ((d1, d2) eq -> (e1, e2) eq)
+= function
+ | End_of_fmtty ->
+ (fun Refl -> Refl),
+ (fun Refl -> Refl),
+ (fun Refl -> Refl),
+ (fun Refl -> Refl)
+ | Char_ty rest ->
+ let fa, af, ed, de = fmtty_rel_det rest in
+ (fun Refl -> let Refl = fa Refl in Refl),
+ (fun Refl -> let Refl = af Refl in Refl),
+ ed, de
+ | String_ty rest ->
+ let fa, af, ed, de = fmtty_rel_det rest in
+ (fun Refl -> let Refl = fa Refl in Refl),
+ (fun Refl -> let Refl = af Refl in Refl),
+ ed, de
+ | Int_ty rest ->
+ let fa, af, ed, de = fmtty_rel_det rest in
+ (fun Refl -> let Refl = fa Refl in Refl),
+ (fun Refl -> let Refl = af Refl in Refl),
+ ed, de
+ | Int32_ty rest ->
+ let fa, af, ed, de = fmtty_rel_det rest in
+ (fun Refl -> let Refl = fa Refl in Refl),
+ (fun Refl -> let Refl = af Refl in Refl),
+ ed, de
+ | Int64_ty rest ->
+ let fa, af, ed, de = fmtty_rel_det rest in
+ (fun Refl -> let Refl = fa Refl in Refl),
+ (fun Refl -> let Refl = af Refl in Refl),
+ ed, de
+ | Nativeint_ty rest ->
+ let fa, af, ed, de = fmtty_rel_det rest in
+ (fun Refl -> let Refl = fa Refl in Refl),
+ (fun Refl -> let Refl = af Refl in Refl),
+ ed, de
+ | Float_ty rest ->
+ let fa, af, ed, de = fmtty_rel_det rest in
+ (fun Refl -> let Refl = fa Refl in Refl),
+ (fun Refl -> let Refl = af Refl in Refl),
+ ed, de
+ | Bool_ty rest ->
+ let fa, af, ed, de = fmtty_rel_det rest in
+ (fun Refl -> let Refl = fa Refl in Refl),
+ (fun Refl -> let Refl = af Refl in Refl),
+ ed, de
+
+ | Theta_ty rest ->
+ let fa, af, ed, de = fmtty_rel_det rest in
+ (fun Refl -> let Refl = fa Refl in Refl),
+ (fun Refl -> let Refl = af Refl in Refl),
+ ed, de
+ | Alpha_ty rest ->
+ let fa, af, ed, de = fmtty_rel_det rest in
+ (fun Refl -> let Refl = fa Refl in Refl),
+ (fun Refl -> let Refl = af Refl in Refl),
+ ed, de
+ | Reader_ty rest ->
+ let fa, af, ed, de = fmtty_rel_det rest in
+ (fun Refl -> let Refl = fa Refl in Refl),
+ (fun Refl -> let Refl = af Refl in Refl),
+ (fun Refl -> let Refl = ed Refl in Refl),
+ (fun Refl -> let Refl = de Refl in Refl)
+ | Ignored_reader_ty rest ->
+ let fa, af, ed, de = fmtty_rel_det rest in
+ (fun Refl -> let Refl = fa Refl in Refl),
+ (fun Refl -> let Refl = af Refl in Refl),
+ (fun Refl -> let Refl = ed Refl in Refl),
+ (fun Refl -> let Refl = de Refl in Refl)
+ | Format_arg_ty (_ty, rest) ->
+ let fa, af, ed, de = fmtty_rel_det rest in
+ (fun Refl -> let Refl = fa Refl in Refl),
+ (fun Refl -> let Refl = af Refl in Refl),
+ ed, de
+ | Format_subst_ty (ty1, ty2, rest) ->
+ let fa, af, ed, de = fmtty_rel_det rest in
+ let ty = trans (symm ty1) ty2 in
+ let ag, ga, dj, jd = fmtty_rel_det ty in
+ (fun Refl -> let Refl = fa Refl in let Refl = ag Refl in Refl),
+ (fun Refl -> let Refl = ga Refl in let Refl = af Refl in Refl),
+ (fun Refl -> let Refl = ed Refl in let Refl = dj Refl in Refl),
+ (fun Refl -> let Refl = jd Refl in let Refl = de Refl in Refl)
+
+(* Precondition: we assume that the two fmtty_rel arguments have equal
+ values (at possibly distinct types); this invariant comes from the way
+ fmtty_rel witnesses are produced by the type-checker
+
+ The code below uses (assert false) when this assumption is broken. The
+ code pattern is the following:
+
+ | Foo x, Foo y ->
+ (* case where indeed both values
+ start with constructor Foo *)
+ | Foo _, _
+ | _, Foo _ ->
+ (* different head constructors: broken precondition *)
+ assert false
+*)
+and trans : type
+ a1 b1 c1 d1 e1 f1
+ a2 b2 c2 d2 e2 f2
+ a3 b3 c3 d3 e3 f3
+.
+ (a1, b1, c1, d1, e1, f1,
+ a2, b2, c2, d2, e2, f2) fmtty_rel
+-> (a2, b2, c2, d2, e2, f2,
+ a3, b3, c3, d3, e3, f3) fmtty_rel
+-> (a1, b1, c1, d1, e1, f1,
+ a3, b3, c3, d3, e3, f3) fmtty_rel
+= fun ty1 ty2 -> match ty1, ty2 with
+ | Char_ty rest1, Char_ty rest2 -> Char_ty (trans rest1 rest2)
+ | String_ty rest1, String_ty rest2 -> String_ty (trans rest1 rest2)
+ | Bool_ty rest1, Bool_ty rest2 -> Bool_ty (trans rest1 rest2)
+ | Int_ty rest1, Int_ty rest2 -> Int_ty (trans rest1 rest2)
+ | Int32_ty rest1, Int32_ty rest2 -> Int32_ty (trans rest1 rest2)
+ | Int64_ty rest1, Int64_ty rest2 -> Int64_ty (trans rest1 rest2)
+ | Nativeint_ty rest1, Nativeint_ty rest2 -> Nativeint_ty (trans rest1 rest2)
+ | Float_ty rest1, Float_ty rest2 -> Float_ty (trans rest1 rest2)
+
+ | Alpha_ty rest1, Alpha_ty rest2 -> Alpha_ty (trans rest1 rest2)
+ | Alpha_ty _, _ -> assert false
+ | _, Alpha_ty _ -> assert false
+
+ | Theta_ty rest1, Theta_ty rest2 -> Theta_ty (trans rest1 rest2)
+ | Theta_ty _, _ -> assert false
+ | _, Theta_ty _ -> assert false
+
+ | Reader_ty rest1, Reader_ty rest2 -> Reader_ty (trans rest1 rest2)
+ | Reader_ty _, _ -> assert false
+ | _, Reader_ty _ -> assert false
+
+ | Ignored_reader_ty rest1, Ignored_reader_ty rest2 ->
+ Ignored_reader_ty (trans rest1 rest2)
+ | Ignored_reader_ty _, _ -> assert false
+ | _, Ignored_reader_ty _ -> assert false
+
+ | Format_arg_ty (ty1, rest1), Format_arg_ty (ty2, rest2) ->
+ Format_arg_ty (trans ty1 ty2, trans rest1 rest2)
+ | Format_arg_ty _, _ -> assert false
+ | _, Format_arg_ty _ -> assert false
+
+ | Format_subst_ty (ty11, ty12, rest1),
+ Format_subst_ty (ty21, ty22, rest2) ->
+ let ty = trans (symm ty12) ty21 in
+ let _, f2, _, f4 = fmtty_rel_det ty in
+ let Refl = f2 Refl in
+ let Refl = f4 Refl in
+ Format_subst_ty (ty11, ty22, trans rest1 rest2)
+ | Format_subst_ty _, _ -> assert false
+ | _, Format_subst_ty _ -> assert false
+
+ | End_of_fmtty, End_of_fmtty -> End_of_fmtty
+ | End_of_fmtty, _ -> assert false
+ | _, End_of_fmtty -> assert false
+
+let rec fmtty_of_formatting_gen : type a b c d e f .
+ (a, b, c, d, e, f) formatting_gen ->
+ (a, b, c, d, e, f) fmtty =
+fun formatting_gen -> match formatting_gen with
+ | Open_tag (Format (fmt, _)) -> fmtty_of_fmt fmt
+ | Open_box (Format (fmt, _)) -> fmtty_of_fmt fmt
+
+(* Extract the type representation (an fmtty) of a format. *)
+and fmtty_of_fmt : type a b c d e f .
+ (a, b, c, d, e, f) fmt -> (a, b, c, d, e, f) fmtty =
+fun fmtty -> match fmtty with
+ | String (pad, rest) ->
+ fmtty_of_padding_fmtty pad (String_ty (fmtty_of_fmt rest))
+ | Caml_string (pad, rest) ->
+ fmtty_of_padding_fmtty pad (String_ty (fmtty_of_fmt rest))
+
+ | Int (_, pad, prec, rest) ->
+ let ty_rest = fmtty_of_fmt rest in
+ let prec_ty = fmtty_of_precision_fmtty prec (Int_ty ty_rest) in
+ fmtty_of_padding_fmtty pad prec_ty
+ | Int32 (_, pad, prec, rest) ->
+ let ty_rest = fmtty_of_fmt rest in
+ let prec_ty = fmtty_of_precision_fmtty prec (Int32_ty ty_rest) in
+ fmtty_of_padding_fmtty pad prec_ty
+ | Nativeint (_, pad, prec, rest) ->
+ let ty_rest = fmtty_of_fmt rest in
+ let prec_ty = fmtty_of_precision_fmtty prec (Nativeint_ty ty_rest) in
+ fmtty_of_padding_fmtty pad prec_ty
+ | Int64 (_, pad, prec, rest) ->
+ let ty_rest = fmtty_of_fmt rest in
+ let prec_ty = fmtty_of_precision_fmtty prec (Int64_ty ty_rest) in
+ fmtty_of_padding_fmtty pad prec_ty
+ | Float (_, pad, prec, rest) ->
+ let ty_rest = fmtty_of_fmt rest in
+ let prec_ty = fmtty_of_precision_fmtty prec (Float_ty ty_rest) in
+ fmtty_of_padding_fmtty pad prec_ty
+
+ | Char rest -> Char_ty (fmtty_of_fmt rest)
+ | Caml_char rest -> Char_ty (fmtty_of_fmt rest)
+ | Bool rest -> Bool_ty (fmtty_of_fmt rest)
+ | Alpha rest -> Alpha_ty (fmtty_of_fmt rest)
+ | Theta rest -> Theta_ty (fmtty_of_fmt rest)
+ | Reader rest -> Reader_ty (fmtty_of_fmt rest)
+
+ | Format_arg (_, ty, rest) ->
+ Format_arg_ty (ty, fmtty_of_fmt rest)
+ | Format_subst (_, ty, rest) ->
+ Format_subst_ty (ty, ty, fmtty_of_fmt rest)
+
+ | Flush rest -> fmtty_of_fmt rest
+ | String_literal (_, rest) -> fmtty_of_fmt rest
+ | Char_literal (_, rest) -> fmtty_of_fmt rest
+
+ | Scan_char_set (_, _, rest) -> String_ty (fmtty_of_fmt rest)
+ | Scan_get_counter (_, rest) -> Int_ty (fmtty_of_fmt rest)
+ | Ignored_param (ign, rest) -> fmtty_of_ignored_format ign rest
+ | Formatting_lit (_, rest) -> fmtty_of_fmt rest
+ | Formatting_gen (fmting_gen, rest) ->
+ concat_fmtty (fmtty_of_formatting_gen fmting_gen) (fmtty_of_fmt rest)
+
+ | End_of_format -> End_of_fmtty
+
+(* Extract the fmtty of an ignored parameter followed by the rest of
+ the format. *)
+and fmtty_of_ignored_format : type x y a b c d e f .
+ (a, b, c, d, y, x) ignored ->
+ (x, b, c, y, e, f) fmt ->
+ (a, b, c, d, e, f) fmtty =
+fun ign fmt -> match ign with
+ | Ignored_char -> fmtty_of_fmt fmt
+ | Ignored_caml_char -> fmtty_of_fmt fmt
+ | Ignored_string _ -> fmtty_of_fmt fmt
+ | Ignored_caml_string _ -> fmtty_of_fmt fmt
+ | Ignored_int (_, _) -> fmtty_of_fmt fmt
+ | Ignored_int32 (_, _) -> fmtty_of_fmt fmt
+ | Ignored_nativeint (_, _) -> fmtty_of_fmt fmt
+ | Ignored_int64 (_, _) -> fmtty_of_fmt fmt
+ | Ignored_float (_, _) -> fmtty_of_fmt fmt
+ | Ignored_bool -> fmtty_of_fmt fmt
+ | Ignored_format_arg _ -> fmtty_of_fmt fmt
+ | Ignored_format_subst (_, fmtty) -> concat_fmtty fmtty (fmtty_of_fmt fmt)
+ | Ignored_reader -> Ignored_reader_ty (fmtty_of_fmt fmt)
+ | Ignored_scan_char_set _ -> fmtty_of_fmt fmt
+ | Ignored_scan_get_counter _ -> fmtty_of_fmt fmt
+
+(* Add an Int_ty node if padding is taken as an extra argument (ex: "%*s"). *)
+and fmtty_of_padding_fmtty : type x a b c d e f .
+ (x, a) padding -> (a, b, c, d, e, f) fmtty -> (x, b, c, d, e, f) fmtty =
+ fun pad fmtty -> match pad with
+ | No_padding -> fmtty
+ | Lit_padding _ -> fmtty
+ | Arg_padding _ -> Int_ty fmtty
+
+(* Add an Int_ty node if precision is taken as an extra argument (ex: "%.*f").*)
+and fmtty_of_precision_fmtty : type x a b c d e f .
+ (x, a) precision -> (a, b, c, d, e, f) fmtty -> (x, b, c, d, e, f) fmtty =
+ fun prec fmtty -> match prec with
+ | No_precision -> fmtty
+ | Lit_precision _ -> fmtty
+ | Arg_precision -> Int_ty fmtty
+
+(******************************************************************************)
+ (* Format typing *)
+
+(* Exception raised when a format does not match a given format type. *)
+exception Type_mismatch
+
+(* Type a padding. *)
+(* Take an Int_ty from the fmtty if the integer should be kept as argument. *)
+(* Raise Type_mismatch in case of type mismatch. *)
+let type_padding : type a b c d e f x y .
+ (x, y) padding -> (a, b, c, d, e, f) fmtty ->
+ (a, b, c, d, e, f) padding_fmtty_ebb =
+fun pad fmtty -> match pad, fmtty with
+ | No_padding, _ -> Padding_fmtty_EBB (No_padding, fmtty)
+ | Lit_padding (padty, w), _ -> Padding_fmtty_EBB (Lit_padding (padty,w),fmtty)
+ | Arg_padding padty, Int_ty rest -> Padding_fmtty_EBB (Arg_padding padty,rest)
+ | _ -> raise Type_mismatch
+
+(* Convert a (upadding, uprecision) to a (padding, precision). *)
+(* Take one or two Int_ty from the fmtty if needed. *)
+(* Raise Type_mismatch in case of type mismatch. *)
+let type_padprec : type a b c d e f x y z .
+ (x, y) padding -> (y, z) precision -> (a, b, c, d, e, f) fmtty ->
+ (a, b, c, d, e, f) padprec_fmtty_ebb =
+fun pad prec fmtty -> match prec, type_padding pad fmtty with
+ | No_precision, Padding_fmtty_EBB (pad, rest) ->
+ Padprec_fmtty_EBB (pad, No_precision, rest)
+ | Lit_precision p, Padding_fmtty_EBB (pad, rest) ->
+ Padprec_fmtty_EBB (pad, Lit_precision p, rest)
+ | Arg_precision, Padding_fmtty_EBB (pad, Int_ty rest) ->
+ Padprec_fmtty_EBB (pad, Arg_precision, rest)
+ | _, Padding_fmtty_EBB (_, _) -> raise Type_mismatch
+
+(* Type a format according to an fmtty. *)
+(* If typing succeed, generate a copy of the format with the same
+ type parameters as the fmtty. *)
+(* Raise a Failure with an error message in case of type mismatch. *)
+let rec type_format :
+ type a1 b1 c1 d1 e1 f1
+ a2 b2 c2 d2 e2 f2 .
+ (a1, b1, c1, d1, e1, f1) fmt
+ -> (a2, b2, c2, d2, e2, f2) fmtty
+ -> (a2, b2, c2, d2, e2, f2) fmt
+= fun fmt fmtty -> match type_format_gen fmt fmtty with
+ | Fmt_fmtty_EBB (fmt', End_of_fmtty) -> fmt'
+ | _ -> raise Type_mismatch
+
+and type_format_gen :
+ type a1 b1 c1 d1 e1 f1
+ a2 b2 c2 d2 e2 f2 .
+ (a1, b1, c1, d1, e1, f1) fmt
+ -> (a2, b2, c2, d2, e2, f2) fmtty
+ -> (a2, b2, c2, d2, e2, f2) fmt_fmtty_ebb
+= fun fmt fmtty -> match fmt, fmtty with
+ | Char fmt_rest, Char_ty fmtty_rest ->
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in
+ Fmt_fmtty_EBB (Char fmt', fmtty')
+ | Caml_char fmt_rest, Char_ty fmtty_rest ->
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in
+ Fmt_fmtty_EBB (Caml_char fmt', fmtty')
+ | String (pad, fmt_rest), _ -> (
+ match type_padding pad fmtty with
+ | Padding_fmtty_EBB (pad, String_ty fmtty_rest) ->
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in
+ Fmt_fmtty_EBB (String (pad, fmt'), fmtty')
+ | Padding_fmtty_EBB (_, _) -> raise Type_mismatch
+ )
+ | Caml_string (pad, fmt_rest), _ -> (
+ match type_padding pad fmtty with
+ | Padding_fmtty_EBB (pad, String_ty fmtty_rest) ->
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in
+ Fmt_fmtty_EBB (Caml_string (pad, fmt'), fmtty')
+ | Padding_fmtty_EBB (_, _) -> raise Type_mismatch
+ )
+ | Int (iconv, pad, prec, fmt_rest), _ -> (
+ match type_padprec pad prec fmtty with
+ | Padprec_fmtty_EBB (pad, prec, Int_ty fmtty_rest) ->
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in
+ Fmt_fmtty_EBB (Int (iconv, pad, prec, fmt'), fmtty')
+ | Padprec_fmtty_EBB (_, _, _) -> raise Type_mismatch
+ )
+ | Int32 (iconv, pad, prec, fmt_rest), _ -> (
+ match type_padprec pad prec fmtty with
+ | Padprec_fmtty_EBB (pad, prec, Int32_ty fmtty_rest) ->
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in
+ Fmt_fmtty_EBB (Int32 (iconv, pad, prec, fmt'), fmtty')
+ | Padprec_fmtty_EBB (_, _, _) -> raise Type_mismatch
+ )
+ | Nativeint (iconv, pad, prec, fmt_rest), _ -> (
+ match type_padprec pad prec fmtty with
+ | Padprec_fmtty_EBB (pad, prec, Nativeint_ty fmtty_rest) ->
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in
+ Fmt_fmtty_EBB (Nativeint (iconv, pad, prec, fmt'), fmtty')
+ | Padprec_fmtty_EBB (_, _, _) -> raise Type_mismatch
+ )
+ | Int64 (iconv, pad, prec, fmt_rest), _ -> (
+ match type_padprec pad prec fmtty with
+ | Padprec_fmtty_EBB (pad, prec, Int64_ty fmtty_rest) ->
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in
+ Fmt_fmtty_EBB (Int64 (iconv, pad, prec, fmt'), fmtty')
+ | Padprec_fmtty_EBB (_, _, _) -> raise Type_mismatch
+ )
+ | Float (fconv, pad, prec, fmt_rest), _ -> (
+ match type_padprec pad prec fmtty with
+ | Padprec_fmtty_EBB (pad, prec, Float_ty fmtty_rest) ->
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in
+ Fmt_fmtty_EBB (Float (fconv, pad, prec, fmt'), fmtty')
+ | Padprec_fmtty_EBB (_, _, _) -> raise Type_mismatch
+ )
+ | Bool fmt_rest, Bool_ty fmtty_rest ->
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in
+ Fmt_fmtty_EBB (Bool fmt', fmtty')
+ | Flush fmt_rest, fmtty_rest ->
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in
+ Fmt_fmtty_EBB (Flush fmt', fmtty')
+
+ | String_literal (str, fmt_rest), fmtty_rest ->
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in
+ Fmt_fmtty_EBB (String_literal (str, fmt'), fmtty')
+ | Char_literal (chr, fmt_rest), fmtty_rest ->
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in
+ Fmt_fmtty_EBB (Char_literal (chr, fmt'), fmtty')
+
+ | Format_arg (pad_opt, sub_fmtty, fmt_rest),
+ Format_arg_ty (sub_fmtty', fmtty_rest) ->
+ if Fmtty_EBB sub_fmtty <> Fmtty_EBB sub_fmtty' then raise Type_mismatch;
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in
+ Fmt_fmtty_EBB (Format_arg (pad_opt, sub_fmtty', fmt'), fmtty')
+ | Format_subst (pad_opt, sub_fmtty, fmt_rest),
+ Format_subst_ty (sub_fmtty1, _sub_fmtty2, fmtty_rest) ->
+ if Fmtty_EBB (erase_rel sub_fmtty) <> Fmtty_EBB (erase_rel sub_fmtty1) then
+ raise Type_mismatch;
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest (erase_rel fmtty_rest) in
+ Fmt_fmtty_EBB (Format_subst (pad_opt, sub_fmtty1, fmt'), fmtty')
+ (* Printf and Format specific constructors: *)
+ | Alpha fmt_rest, Alpha_ty fmtty_rest ->
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in
+ Fmt_fmtty_EBB (Alpha fmt', fmtty')
+ | Theta fmt_rest, Theta_ty fmtty_rest ->
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in
+ Fmt_fmtty_EBB (Theta fmt', fmtty')
+
+ (* Format specific constructors: *)
+ | Formatting_lit (formatting_lit, fmt_rest), fmtty_rest ->
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in
+ Fmt_fmtty_EBB (Formatting_lit (formatting_lit, fmt'), fmtty')
+ | Formatting_gen (formatting_gen, fmt_rest), fmtty_rest ->
+ type_formatting_gen formatting_gen fmt_rest fmtty_rest
+
+ (* Scanf specific constructors: *)
+ | Reader fmt_rest, Reader_ty fmtty_rest ->
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in
+ Fmt_fmtty_EBB (Reader fmt', fmtty')
+ | Scan_char_set (width_opt, char_set, fmt_rest), String_ty fmtty_rest ->
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in
+ Fmt_fmtty_EBB (Scan_char_set (width_opt, char_set, fmt'), fmtty')
+ | Scan_get_counter (counter, fmt_rest), Int_ty fmtty_rest ->
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in
+ Fmt_fmtty_EBB (Scan_get_counter (counter, fmt'), fmtty')
+ | Ignored_param (ign, rest), fmtty_rest ->
+ type_ignored_param ign rest fmtty_rest
+
+ | End_of_format, fmtty_rest -> Fmt_fmtty_EBB (End_of_format, fmtty_rest)
+
+ | _ -> raise Type_mismatch
+
+and type_formatting_gen : type a1 a3 b1 b3 c1 c3 d1 d3 e1 e2 e3 f1 f2 f3 .
+ (a1, b1, c1, d1, e1, f1) formatting_gen ->
+ (f1, b1, c1, e1, e2, f2) fmt ->
+ (a3, b3, c3, d3, e3, f3) fmtty ->
+ (a3, b3, c3, d3, e3, f3) fmt_fmtty_ebb =
+fun formatting_gen fmt0 fmtty0 -> match formatting_gen with
+ | Open_tag (Format (fmt1, str)) ->
+ let Fmt_fmtty_EBB (fmt2, fmtty2) = type_format_gen fmt1 fmtty0 in
+ let Fmt_fmtty_EBB (fmt3, fmtty3) = type_format_gen fmt0 fmtty2 in
+ Fmt_fmtty_EBB (Formatting_gen (Open_tag (Format (fmt2, str)), fmt3), fmtty3)
+ | Open_box (Format (fmt1, str)) ->
+ let Fmt_fmtty_EBB (fmt2, fmtty2) = type_format_gen fmt1 fmtty0 in
+ let Fmt_fmtty_EBB (fmt3, fmtty3) = type_format_gen fmt0 fmtty2 in
+ Fmt_fmtty_EBB (Formatting_gen (Open_tag (Format (fmt2, str)), fmt3), fmtty3)
+
+(* Type an Ignored_param node according to an fmtty. *)
+and type_ignored_param : type p q x y z t u v a b c d e f .
+ (x, y, z, t, q, p) ignored ->
+ (p, y, z, q, u, v) fmt ->
+ (a, b, c, d, e, f) fmtty ->
+ (a, b, c, d, e, f) fmt_fmtty_ebb =
+fun ign fmt fmtty -> match ign with
+ | Ignored_char as ign' -> type_ignored_param_one ign' fmt fmtty
+ | Ignored_caml_char as ign' -> type_ignored_param_one ign' fmt fmtty
+ | Ignored_string _ as ign' -> type_ignored_param_one ign' fmt fmtty
+ | Ignored_caml_string _ as ign' -> type_ignored_param_one ign' fmt fmtty
+ | Ignored_int _ as ign' -> type_ignored_param_one ign' fmt fmtty
+ | Ignored_int32 _ as ign' -> type_ignored_param_one ign' fmt fmtty
+ | Ignored_nativeint _ as ign' -> type_ignored_param_one ign' fmt fmtty
+ | Ignored_int64 _ as ign' -> type_ignored_param_one ign' fmt fmtty
+ | Ignored_float _ as ign' -> type_ignored_param_one ign' fmt fmtty
+ | Ignored_bool as ign' -> type_ignored_param_one ign' fmt fmtty
+ | Ignored_scan_char_set _ as ign' -> type_ignored_param_one ign' fmt fmtty
+ | Ignored_scan_get_counter _ as ign' -> type_ignored_param_one ign' fmt fmtty
+ | Ignored_format_arg (pad_opt, sub_fmtty) ->
+ type_ignored_param_one (Ignored_format_arg (pad_opt, sub_fmtty)) fmt fmtty
+ | Ignored_format_subst (pad_opt, sub_fmtty) ->
+ let Fmtty_fmt_EBB (sub_fmtty', Fmt_fmtty_EBB (fmt', fmtty')) =
+ type_ignored_format_substitution sub_fmtty fmt fmtty in
+ Fmt_fmtty_EBB (Ignored_param (Ignored_format_subst (pad_opt, sub_fmtty'), fmt'), fmtty')
+ | Ignored_reader -> (
+ match fmtty with
+ | Ignored_reader_ty fmtty_rest ->
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt fmtty_rest in
+ Fmt_fmtty_EBB (Ignored_param (Ignored_reader, fmt'), fmtty')
+ | _ -> raise Type_mismatch
+ )
+
+and type_ignored_param_one : type a1 a2 b1 b2 c1 c2 d1 d2 e1 e2 f1 f2 .
+ (a2, b2, c2, d2, d2, a2) ignored ->
+ (a1, b1, c1, d1, e1, f1) fmt ->
+ (a2, b2, c2, d2, e2, f2) fmtty ->
+ (a2, b2, c2, d2, e2, f2) fmt_fmtty_ebb
+= fun ign fmt fmtty ->
+ let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt fmtty in
+ Fmt_fmtty_EBB (Ignored_param (ign, fmt'), fmtty')
+
+(* Typing of the complex case: "%_(...%)". *)
+and type_ignored_format_substitution : type w x y z p s t u a b c d e f .
+ (w, x, y, z, s, p) fmtty ->
+ (p, x, y, s, t, u) fmt ->
+ (a, b, c, d, e, f) fmtty -> (a, b, c, d, e, f) fmtty_fmt_ebb =
+fun sub_fmtty fmt fmtty -> match sub_fmtty, fmtty with
+ | Char_ty sub_fmtty_rest, Char_ty fmtty_rest ->
+ let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') =
+ type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in
+ Fmtty_fmt_EBB (Char_ty sub_fmtty_rest', fmt')
+ | String_ty sub_fmtty_rest, String_ty fmtty_rest ->
+ let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') =
+ type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in
+ Fmtty_fmt_EBB (String_ty sub_fmtty_rest', fmt')
+ | Int_ty sub_fmtty_rest, Int_ty fmtty_rest ->
+ let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') =
+ type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in
+ Fmtty_fmt_EBB (Int_ty sub_fmtty_rest', fmt')
+ | Int32_ty sub_fmtty_rest, Int32_ty fmtty_rest ->
+ let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') =
+ type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in
+ Fmtty_fmt_EBB (Int32_ty sub_fmtty_rest', fmt')
+ | Nativeint_ty sub_fmtty_rest, Nativeint_ty fmtty_rest ->
+ let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') =
+ type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in
+ Fmtty_fmt_EBB (Nativeint_ty sub_fmtty_rest', fmt')
+ | Int64_ty sub_fmtty_rest, Int64_ty fmtty_rest ->
+ let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') =
+ type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in
+ Fmtty_fmt_EBB (Int64_ty sub_fmtty_rest', fmt')
+ | Float_ty sub_fmtty_rest, Float_ty fmtty_rest ->
+ let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') =
+ type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in
+ Fmtty_fmt_EBB (Float_ty sub_fmtty_rest', fmt')
+ | Bool_ty sub_fmtty_rest, Bool_ty fmtty_rest ->
+ let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') =
+ type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in
+ Fmtty_fmt_EBB (Bool_ty sub_fmtty_rest', fmt')
+ | Alpha_ty sub_fmtty_rest, Alpha_ty fmtty_rest ->
+ let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') =
+ type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in
+ Fmtty_fmt_EBB (Alpha_ty sub_fmtty_rest', fmt')
+ | Theta_ty sub_fmtty_rest, Theta_ty fmtty_rest ->
+ let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') =
+ type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in
+ Fmtty_fmt_EBB (Theta_ty sub_fmtty_rest', fmt')
+ | Reader_ty sub_fmtty_rest, Reader_ty fmtty_rest ->
+ let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') =
+ type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in
+ Fmtty_fmt_EBB (Reader_ty sub_fmtty_rest', fmt')
+ | Ignored_reader_ty sub_fmtty_rest, Ignored_reader_ty fmtty_rest ->
+ let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') =
+ type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in
+ Fmtty_fmt_EBB (Ignored_reader_ty sub_fmtty_rest', fmt')
+
+ | Format_arg_ty (sub2_fmtty, sub_fmtty_rest),
+ Format_arg_ty (sub2_fmtty', fmtty_rest) ->
+ if Fmtty_EBB sub2_fmtty <> Fmtty_EBB sub2_fmtty' then raise Type_mismatch;
+ let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') =
+ type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in
+ Fmtty_fmt_EBB (Format_arg_ty (sub2_fmtty', sub_fmtty_rest'), fmt')
+ | Format_subst_ty (sub1_fmtty, sub2_fmtty, sub_fmtty_rest),
+ Format_subst_ty (sub1_fmtty', sub2_fmtty', fmtty_rest) ->
+ (* TODO define Fmtty_rel_EBB to remove those erase_rel *)
+ if Fmtty_EBB (erase_rel sub1_fmtty) <> Fmtty_EBB (erase_rel sub1_fmtty') then raise Type_mismatch;
+ if Fmtty_EBB (erase_rel sub2_fmtty) <> Fmtty_EBB (erase_rel sub2_fmtty') then raise Type_mismatch;
+ let sub_fmtty' = trans (symm sub1_fmtty') sub2_fmtty' in
+ let _, f2, _, f4 = fmtty_rel_det sub_fmtty' in
+ let Refl = f2 Refl in
+ let Refl = f4 Refl in
+ let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') =
+ type_ignored_format_substitution (erase_rel sub_fmtty_rest) fmt fmtty_rest in
+ Fmtty_fmt_EBB (Format_subst_ty (sub1_fmtty', sub2_fmtty', symm sub_fmtty_rest'), fmt')
+ | End_of_fmtty, fmtty ->
+ Fmtty_fmt_EBB (End_of_fmtty, type_format_gen fmt fmtty)
+ | _ -> raise Type_mismatch
+
+(* This implementation of `recast` is a bit disappointing. The
+ invariant provided by the type are very strong: the input format's
+ type is in relation to the output type's as witnessed by the
+ fmtty_rel argument. One would at first expect this function to be
+ total, and implementable by exhaustive pattern matching. Instead,
+ we reuse the highly partial and much less well-defined function
+ `type_format` that has lost all knowledge of the correspondence
+ between the argument's types.
+
+ Besides the fact that this function reuses a lot of the
+ `type_format` logic (eg.: seeing Int_ty in the fmtty parameter does
+ not let you match on Int only, as you may in fact have Float
+ (Arg_padding, ...) ("%.*d") beginning with an Int_ty), it is also
+ a partial function, because the typing information in a format is
+ not quite enough to reconstruct it unambiguously. For example, the
+ format types of "%d%_r" and "%_r%d" have the same format6
+ parameters, but they are not at all exchangeable, and putting one
+ in place of the other must result in a dynamic failure.
+
+ Given that:
+ - we'd have to duplicate a lot of non-trivial typing logic from type_format
+ - this wouldn't even eliminate (all) the dynamic failures
+ we decided to just reuse type_format directly for now.
+*)
+let recast :
+ type a1 b1 c1 d1 e1 f1
+ a2 b2 c2 d2 e2 f2
+ .
+ (a1, b1, c1, d1, e1, f1) fmt
+ -> (a1, b1, c1, d1, e1, f1,
+ a2, b2, c2, d2, e2, f2) fmtty_rel
+ -> (a2, b2, c2, d2, e2, f2) fmt
+= fun fmt fmtty ->
+ type_format fmt (erase_rel (symm fmtty))
+
+(******************************************************************************)
+ (* Printing tools *)
+
+(* Add padding spaces arround a string. *)
+let fix_padding padty width str =
+ let len = String.length str in
+ if width <= len then str else
+ let res = Bytes.make width (if padty = Zeros then '0' else ' ') in
+ begin match padty with
+ | Left -> String.blit str 0 res 0 len
+ | Right -> String.blit str 0 res (width - len) len
+ | Zeros when len > 0 && (str.[0] = '+' || str.[0] = '-' || str.[0] = ' ') ->
+ Bytes.set res 0 str.[0];
+ String.blit str 1 res (width - len + 1) (len - 1)
+ | Zeros when len > 1 && str.[0] = '0' && (str.[1] = 'x' || str.[1] = 'X') ->
+ Bytes.set res 1 str.[1];
+ String.blit str 2 res (width - len + 2) (len - 2)
+ | Zeros ->
+ String.blit str 0 res (width - len) len
+ end;
+ Bytes.unsafe_to_string res
+
+(* Add '0' padding to int, int32, nativeint or int64 string representation. *)
+let fix_int_precision prec str =
+ let len = String.length str in
+ if prec <= len then str else
+ let res = Bytes.make prec '0' in
+ begin match str.[0] with
+ | ('+' | '-' | ' ') as c ->
+ Bytes.set res 0 c;
+ String.blit str 1 res (prec - len + 1) (len - 1);
+ | '0' when len > 1 && (str.[1] = 'x' || str.[1] = 'X') ->
+ Bytes.set res 1 str.[1];
+ String.blit str 2 res (prec - len + 2) (len - 2);
+ | '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' ->
+ String.blit str 0 res (prec - len) len;
+ | _ ->
+ assert false
+ end;
+ Bytes.unsafe_to_string res
+
+(* Escape a string according to the OCaml lexing convention. *)
+let string_to_caml_string str =
+ let str = String.escaped str in
+ let l = String.length str in
+ let res = Bytes.make (l + 2) '\"' in
+ String.unsafe_blit str 0 res 1 l;
+ Bytes.unsafe_to_string res
+
+(* Generate the format_int/int32/nativeint/int64 first argument
+ from an int_conv. *)
+let format_of_iconv = function
+ | Int_d -> "%d" | Int_pd -> "%+d" | Int_sd -> "% d"
+ | Int_i -> "%i" | Int_pi -> "%+i" | Int_si -> "% i"
+ | Int_x -> "%x" | Int_Cx -> "%#x"
+ | Int_X -> "%X" | Int_CX -> "%#X"
+ | Int_o -> "%o" | Int_Co -> "%#o"
+ | Int_u -> "%u"
+
+let format_of_iconvL = function
+ | Int_d -> "%Ld" | Int_pd -> "%+Ld" | Int_sd -> "% Ld"
+ | Int_i -> "%Li" | Int_pi -> "%+Li" | Int_si -> "% Li"
+ | Int_x -> "%Lx" | Int_Cx -> "%#Lx"
+ | Int_X -> "%LX" | Int_CX -> "%#LX"
+ | Int_o -> "%Lo" | Int_Co -> "%#Lo"
+ | Int_u -> "%Lu"
+
+let format_of_iconvl = function
+ | Int_d -> "%ld" | Int_pd -> "%+ld" | Int_sd -> "% ld"
+ | Int_i -> "%li" | Int_pi -> "%+li" | Int_si -> "% li"
+ | Int_x -> "%lx" | Int_Cx -> "%#lx"
+ | Int_X -> "%lX" | Int_CX -> "%#lX"
+ | Int_o -> "%lo" | Int_Co -> "%#lo"
+ | Int_u -> "%lu"
+
+let format_of_iconvn = function
+ | Int_d -> "%nd" | Int_pd -> "%+nd" | Int_sd -> "% nd"
+ | Int_i -> "%ni" | Int_pi -> "%+ni" | Int_si -> "% ni"
+ | Int_x -> "%nx" | Int_Cx -> "%#nx"
+ | Int_X -> "%nX" | Int_CX -> "%#nX"
+ | Int_o -> "%no" | Int_Co -> "%#no"
+ | Int_u -> "%nu"
+
+(* Generate the format_float first argument form a float_conv. *)
+let format_of_fconv fconv prec =
+ let symb = if fconv = Float_F then 'g' else char_of_fconv fconv in
+ let buf = buffer_create 16 in
+ buffer_add_char buf '%';
+ bprint_fconv_flag buf fconv;
+ buffer_add_char buf '.';
+ buffer_add_string buf (string_of_int prec);
+ buffer_add_char buf symb;
+ buffer_contents buf
+
+(* Convert an integer to a string according to a conversion. *)
+let convert_int iconv n = format_int (format_of_iconv iconv) n
+let convert_int32 iconv n = format_int32 (format_of_iconvl iconv) n
+let convert_nativeint iconv n = format_nativeint (format_of_iconvn iconv) n
+let convert_int64 iconv n = format_int64 (format_of_iconvL iconv) n
+
+(* Convert a float to string. *)
+(* Fix special case of "OCaml float format". *)
+let convert_float fconv prec x =
+ let str = format_float (format_of_fconv fconv prec) x in
+ if fconv <> Float_F then str else
+ let len = String.length str in
+ let rec is_valid i =
+ if i = len then false else
+ match str.[i] with
+ | '.' | 'e' | 'E' -> true
+ | _ -> is_valid (i + 1)
+ in
+ match classify_float x with
+ | FP_normal | FP_subnormal | FP_zero ->
+ if is_valid 0 then str else str ^ "."
+ | FP_infinite ->
+ if x < 0.0 then "neg_infinity" else "infinity"
+ | FP_nan -> "nan"
+
+(* Convert a char to a string according to the OCaml lexical convention. *)
+let format_caml_char c =
+ let str = Char.escaped c in
+ let l = String.length str in
+ let res = Bytes.make (l + 2) '\'' in
+ String.unsafe_blit str 0 res 1 l;
+ Bytes.unsafe_to_string res
+
+(* Convert a format type to string *)
+let string_of_fmtty fmtty =
+ let buf = buffer_create 16 in
+ bprint_fmtty buf fmtty;
+ buffer_contents buf
+
+(******************************************************************************)
+ (* Generic printing function *)
+
+(* Make a generic printing function. *)
+(* Used to generate Printf and Format printing functions. *)
+(* Parameters:
+ k: a continuation finally applied to the output stream and the accumulator.
+ o: the output stream (see k, %a and %t).
+ acc: rev list of printing entities (string, char, flush, formatting, ...).
+ fmt: the format. *)
+let rec make_printf : type a b c d e f .
+ (b -> (b, c) acc -> f) -> b -> (b, c) acc ->
+ (a, b, c, d, e, f) fmt -> a =
+fun k o acc fmt -> match fmt with
+ | Char rest ->
+ fun c ->
+ let new_acc = Acc_data_char (acc, c) in
+ make_printf k o new_acc rest
+ | Caml_char rest ->
+ fun c ->
+ let new_acc = Acc_data_string (acc, format_caml_char c) in
+ make_printf k o new_acc rest
+ | String (pad, rest) ->
+ make_string_padding k o acc rest pad (fun str -> str)
+ | Caml_string (pad, rest) ->
+ make_string_padding k o acc rest pad string_to_caml_string
+ | Int (iconv, pad, prec, rest) ->
+ make_int_padding_precision k o acc rest pad prec convert_int iconv
+ | Int32 (iconv, pad, prec, rest) ->
+ make_int_padding_precision k o acc rest pad prec convert_int32 iconv
+ | Nativeint (iconv, pad, prec, rest) ->
+ make_int_padding_precision k o acc rest pad prec convert_nativeint iconv
+ | Int64 (iconv, pad, prec, rest) ->
+ make_int_padding_precision k o acc rest pad prec convert_int64 iconv
+ | Float (fconv, pad, prec, rest) ->
+ make_float_padding_precision k o acc rest pad prec fconv
+ | Bool rest ->
+ fun b -> make_printf k o (Acc_data_string (acc, string_of_bool b)) rest
+ | Alpha rest ->
+ fun f x -> make_printf k o (Acc_delay (acc, fun o -> f o x)) rest
+ | Theta rest ->
+ fun f -> make_printf k o (Acc_delay (acc, f)) rest
+ | Reader _ ->
+ (* This case is impossible, by typing of formats. *)
+ (* Indeed, since printf and co. take a format4 as argument, the 'd and 'e
+ type parameters of fmt are obviously equals. The Reader is the
+ only constructor which touch 'd and 'e type parameters of the format
+ type, it adds an (->) to the 'd parameters. Consequently, a format4
+ cannot contain a Reader node, except in the sub-format associated to
+ an %{...%}. It's not a problem because make_printf do not call
+ itself recursively on the sub-format associated to %{...%}. *)
+ assert false
+ | Flush rest ->
+ make_printf k o (Acc_flush acc) rest
+
+ | String_literal (str, rest) ->
+ make_printf k o (Acc_string_literal (acc, str)) rest
+ | Char_literal (chr, rest) ->
+ make_printf k o (Acc_char_literal (acc, chr)) rest
+
+ | Format_arg (_, sub_fmtty, rest) ->
+ let ty = string_of_fmtty sub_fmtty in
+ (fun str ->
+ ignore str;
+ make_printf k o (Acc_data_string (acc, ty)) rest)
+ | Format_subst (_, fmtty, rest) ->
+ fun (Format (fmt, _)) -> make_printf k o acc
+ (concat_fmt (recast fmt fmtty) rest)
+
+ | Scan_char_set (_, _, rest) ->
+ let new_acc = Acc_invalid_arg (acc, "Printf: bad conversion %[") in
+ fun _ -> make_printf k o new_acc rest
+ | Scan_get_counter (_, rest) ->
+ (* This case should be refused for Printf. *)
+ (* Accepted for backward compatibility. *)
+ (* Interpret %l, %n and %L as %u. *)
+ fun n ->
+ let new_acc = Acc_data_string (acc, format_int "%u" n) in
+ make_printf k o new_acc rest
+ | Ignored_param (ign, rest) ->
+ make_ignored_param k o acc ign rest
+
+ | Formatting_lit (fmting_lit, rest) ->
+ make_printf k o (Acc_formatting_lit (acc, fmting_lit)) rest
+ | Formatting_gen (Open_tag (Format (fmt', _)), rest) ->
+ let k' koc kacc =
+ make_printf k koc (Acc_formatting_gen (acc, Acc_open_tag kacc)) rest in
+ make_printf k' o End_of_acc fmt'
+ | Formatting_gen (Open_box (Format (fmt', _)), rest) ->
+ let k' koc kacc =
+ make_printf k koc (Acc_formatting_gen (acc, Acc_open_box kacc)) rest in
+ make_printf k' o End_of_acc fmt'
+
+ | End_of_format ->
+ k o acc
+
+(* Delay the error (Invalid_argument "Printf: bad conversion %_"). *)
+(* Generate functions to take remaining arguments (after the "%_"). *)
+and make_ignored_param : type x y a b c d e f .
+ (b -> (b, c) acc -> f) -> b -> (b, c) acc ->
+ (a, b, c, d, y, x) ignored ->
+ (x, b, c, y, e, f) fmt -> a =
+fun k o acc ign fmt -> match ign with
+ | Ignored_char -> make_invalid_arg k o acc fmt
+ | Ignored_caml_char -> make_invalid_arg k o acc fmt
+ | Ignored_string _ -> make_invalid_arg k o acc fmt
+ | Ignored_caml_string _ -> make_invalid_arg k o acc fmt
+ | Ignored_int (_, _) -> make_invalid_arg k o acc fmt
+ | Ignored_int32 (_, _) -> make_invalid_arg k o acc fmt
+ | Ignored_nativeint (_, _) -> make_invalid_arg k o acc fmt
+ | Ignored_int64 (_, _) -> make_invalid_arg k o acc fmt
+ | Ignored_float (_, _) -> make_invalid_arg k o acc fmt
+ | Ignored_bool -> make_invalid_arg k o acc fmt
+ | Ignored_format_arg _ -> make_invalid_arg k o acc fmt
+ | Ignored_format_subst (_, fmtty) -> make_from_fmtty k o acc fmtty fmt
+ | Ignored_reader -> assert false
+ | Ignored_scan_char_set _ -> make_invalid_arg k o acc fmt
+ | Ignored_scan_get_counter _ -> make_invalid_arg k o acc fmt
+
+
+(* Special case of printf "%_(". *)
+and make_from_fmtty : type x y a b c d e f .
+ (b -> (b, c) acc -> f) -> b -> (b, c) acc ->
+ (a, b, c, d, y, x) fmtty ->
+ (x, b, c, y, e, f) fmt -> a =
+fun k o acc fmtty fmt -> match fmtty with
+ | Char_ty rest -> fun _ -> make_from_fmtty k o acc rest fmt
+ | String_ty rest -> fun _ -> make_from_fmtty k o acc rest fmt
+ | Int_ty rest -> fun _ -> make_from_fmtty k o acc rest fmt
+ | Int32_ty rest -> fun _ -> make_from_fmtty k o acc rest fmt
+ | Nativeint_ty rest -> fun _ -> make_from_fmtty k o acc rest fmt
+ | Int64_ty rest -> fun _ -> make_from_fmtty k o acc rest fmt
+ | Float_ty rest -> fun _ -> make_from_fmtty k o acc rest fmt
+ | Bool_ty rest -> fun _ -> make_from_fmtty k o acc rest fmt
+ | Alpha_ty rest -> fun _ _ -> make_from_fmtty k o acc rest fmt
+ | Theta_ty rest -> fun _ -> make_from_fmtty k o acc rest fmt
+ | Reader_ty _ -> assert false
+ | Ignored_reader_ty _ -> assert false
+ | Format_arg_ty (_, rest) -> fun _ -> make_from_fmtty k o acc rest fmt
+ | End_of_fmtty -> make_invalid_arg k o acc fmt
+ | Format_subst_ty (ty1, ty2, rest) ->
+ let ty = trans (symm ty1) ty2 in
+ fun _ -> make_from_fmtty k o acc (concat_fmtty ty rest) fmt
+
+(* Insert an Acc_invalid_arg in the accumulator and continue to generate
+ closures to get the remaining arguments. *)
+and make_invalid_arg : type a b c d e f .
+ (b -> (b, c) acc -> f) -> b -> (b, c) acc ->
+ (a, b, c, d, e, f) fmt -> a =
+fun k o acc fmt ->
+ make_printf k o (Acc_invalid_arg (acc, "Printf: bad conversion %_")) fmt
+
+(* Fix padding, take it as an extra integer argument if needed. *)
+and make_string_padding : type x z a b c d e f .
+ (b -> (b, c) acc -> f) -> b -> (b, c) acc ->
+ (a, b, c, d, e, f) fmt ->
+ (x, z -> a) padding -> (z -> string) -> x =
+ fun k o acc fmt pad trans -> match pad with
+ | No_padding ->
+ fun x ->
+ let new_acc = Acc_data_string (acc, trans x) in
+ make_printf k o new_acc fmt
+ | Lit_padding (padty, width) ->
+ fun x ->
+ let new_acc = Acc_data_string (acc, fix_padding padty width (trans x)) in
+ make_printf k o new_acc fmt
+ | Arg_padding padty ->
+ fun w x ->
+ let new_acc = Acc_data_string (acc, fix_padding padty w (trans x)) in
+ make_printf k o new_acc fmt
+
+(* Fix padding and precision for int, int32, nativeint or int64. *)
+(* Take one or two extra integer arguments if needed. *)
+and make_int_padding_precision : type x y z a b c d e f .
+ (b -> (b, c) acc -> f) -> b -> (b, c) acc ->
+ (a, b, c, d, e, f) fmt ->
+ (x, y) padding -> (y, z -> a) precision -> (int_conv -> z -> string) ->
+ int_conv -> x =
+ fun k o acc fmt pad prec trans iconv -> match pad, prec with
+ | No_padding, No_precision ->
+ fun x ->
+ let str = trans iconv x in
+ make_printf k o (Acc_data_string (acc, str)) fmt
+ | No_padding, Lit_precision p ->
+ fun x ->
+ let str = fix_int_precision p (trans iconv x) in
+ make_printf k o (Acc_data_string (acc, str)) fmt
+ | No_padding, Arg_precision ->
+ fun p x ->
+ let str = fix_int_precision p (trans iconv x) in
+ make_printf k o (Acc_data_string (acc, str)) fmt
+ | Lit_padding (padty, w), No_precision ->
+ fun x ->
+ let str = fix_padding padty w (trans iconv x) in
+ make_printf k o (Acc_data_string (acc, str)) fmt
+ | Lit_padding (padty, w), Lit_precision p ->
+ fun x ->
+ let str = fix_padding padty w (fix_int_precision p (trans iconv x)) in
+ make_printf k o (Acc_data_string (acc, str)) fmt
+ | Lit_padding (padty, w), Arg_precision ->
+ fun p x ->
+ let str = fix_padding padty w (fix_int_precision p (trans iconv x)) in
+ make_printf k o (Acc_data_string (acc, str)) fmt
+ | Arg_padding padty, No_precision ->
+ fun w x ->
+ let str = fix_padding padty w (trans iconv x) in
+ make_printf k o (Acc_data_string (acc, str)) fmt
+ | Arg_padding padty, Lit_precision p ->
+ fun w x ->
+ let str = fix_padding padty w (fix_int_precision p (trans iconv x)) in
+ make_printf k o (Acc_data_string (acc, str)) fmt
+ | Arg_padding padty, Arg_precision ->
+ fun w p x ->
+ let str = fix_padding padty w (fix_int_precision p (trans iconv x)) in
+ make_printf k o (Acc_data_string (acc, str)) fmt
+
+(* Convert a float, fix padding and precision if needed. *)
+(* Take the float argument and one or two extra integer arguments if needed. *)
+and make_float_padding_precision : type x y a b c d e f .
+ (b -> (b, c) acc -> f) -> b -> (b, c) acc ->
+ (a, b, c, d, e, f) fmt ->
+ (x, y) padding -> (y, float -> a) precision -> float_conv -> x =
+ fun k o acc fmt pad prec fconv -> match pad, prec with
+ | No_padding, No_precision ->
+ fun x ->
+ let str = convert_float fconv default_float_precision x in
+ make_printf k o (Acc_data_string (acc, str)) fmt
+ | No_padding, Lit_precision p ->
+ fun x ->
+ let str = convert_float fconv p x in
+ make_printf k o (Acc_data_string (acc, str)) fmt
+ | No_padding, Arg_precision ->
+ fun p x ->
+ let str = convert_float fconv p x in
+ make_printf k o (Acc_data_string (acc, str)) fmt
+ | Lit_padding (padty, w), No_precision ->
+ fun x ->
+ let str = convert_float fconv default_float_precision x in
+ let str' = fix_padding padty w str in
+ make_printf k o (Acc_data_string (acc, str')) fmt
+ | Lit_padding (padty, w), Lit_precision p ->
+ fun x ->
+ let str = fix_padding padty w (convert_float fconv p x) in
+ make_printf k o (Acc_data_string (acc, str)) fmt
+ | Lit_padding (padty, w), Arg_precision ->
+ fun p x ->
+ let str = fix_padding padty w (convert_float fconv p x) in
+ make_printf k o (Acc_data_string (acc, str)) fmt
+ | Arg_padding padty, No_precision ->
+ fun w x ->
+ let str = convert_float fconv default_float_precision x in
+ let str' = fix_padding padty w str in
+ make_printf k o (Acc_data_string (acc, str')) fmt
+ | Arg_padding padty, Lit_precision p ->
+ fun w x ->
+ let str = fix_padding padty w (convert_float fconv p x) in
+ make_printf k o (Acc_data_string (acc, str)) fmt
+ | Arg_padding padty, Arg_precision ->
+ fun w p x ->
+ let str = fix_padding padty w (convert_float fconv p x) in
+ make_printf k o (Acc_data_string (acc, str)) fmt
+
+(******************************************************************************)
+ (* Continuations for make_printf *)
+
+(* Recursively output an "accumulator" containing a reversed list of
+ printing entities (string, char, flus, ...) in an output_stream. *)
+(* Used as a continuation of make_printf. *)
+let rec output_acc o acc = match acc with
+ | Acc_formatting_lit (p, fmting_lit) ->
+ let s = string_of_formatting_lit fmting_lit in
+ output_acc o p; output_string o s;
+ | Acc_formatting_gen (p, Acc_open_tag acc') ->
+ output_acc o p; output_string o "@{"; output_acc o acc';
+ | Acc_formatting_gen (p, Acc_open_box acc') ->
+ output_acc o p; output_string o "@["; output_acc o acc';
+ | Acc_string_literal (p, s)
+ | Acc_data_string (p, s) -> output_acc o p; output_string o s
+ | Acc_char_literal (p, c)
+ | Acc_data_char (p, c) -> output_acc o p; output_char o c
+ | Acc_delay (p, f) -> output_acc o p; f o
+ | Acc_flush p -> output_acc o p; flush o
+ | Acc_invalid_arg (p, msg) -> output_acc o p; invalid_arg msg;
+ | End_of_acc -> ()
+
+(* Recursively output an "accumulator" containing a reversed list of
+ printing entities (string, char, flus, ...) in a buffer. *)
+(* Used as a continuation of make_printf. *)
+let rec bufput_acc b acc = match acc with
+ | Acc_formatting_lit (p, fmting_lit) ->
+ let s = string_of_formatting_lit fmting_lit in
+ bufput_acc b p; Buffer.add_string b s;
+ | Acc_formatting_gen (p, Acc_open_tag acc') ->
+ bufput_acc b p; Buffer.add_string b "@{"; bufput_acc b acc';
+ | Acc_formatting_gen (p, Acc_open_box acc') ->
+ bufput_acc b p; Buffer.add_string b "@["; bufput_acc b acc';
+ | Acc_string_literal (p, s)
+ | Acc_data_string (p, s) -> bufput_acc b p; Buffer.add_string b s
+ | Acc_char_literal (p, c)
+ | Acc_data_char (p, c) -> bufput_acc b p; Buffer.add_char b c
+ | Acc_delay (p, f) -> bufput_acc b p; f b
+ | Acc_flush p -> bufput_acc b p;
+ | Acc_invalid_arg (p, msg) -> bufput_acc b p; invalid_arg msg;
+ | End_of_acc -> ()
+
+(* Recursively output an "accumulator" containing a reversed list of
+ printing entities (string, char, flus, ...) in a buffer. *)
+(* Differ from bufput_acc by the interpretation of %a and %t. *)
+(* Used as a continuation of make_printf. *)
+let rec strput_acc b acc = match acc with
+ | Acc_formatting_lit (p, fmting_lit) ->
+ let s = string_of_formatting_lit fmting_lit in
+ strput_acc b p; Buffer.add_string b s;
+ | Acc_formatting_gen (p, Acc_open_tag acc') ->
+ strput_acc b p; Buffer.add_string b "@{"; strput_acc b acc';
+ | Acc_formatting_gen (p, Acc_open_box acc') ->
+ strput_acc b p; Buffer.add_string b "@["; strput_acc b acc';
+ | Acc_string_literal (p, s)
+ | Acc_data_string (p, s) -> strput_acc b p; Buffer.add_string b s
+ | Acc_char_literal (p, c)
+ | Acc_data_char (p, c) -> strput_acc b p; Buffer.add_char b c
+ | Acc_delay (p, f) -> strput_acc b p; Buffer.add_string b (f ())
+ | Acc_flush p -> strput_acc b p;
+ | Acc_invalid_arg (p, msg) -> strput_acc b p; invalid_arg msg;
+ | End_of_acc -> ()
+
+(******************************************************************************)
+ (* Error managment *)
+
+(* Raise a Failure with a pretty-printed error message. *)
+let failwith_message (Format (fmt, _)) =
+ let buf = Buffer.create 256 in
+ let k () acc = strput_acc buf acc; failwith (Buffer.contents buf) in
+ make_printf k () End_of_acc fmt
+
+(******************************************************************************)
+ (* Formatting tools *)
+
+(* Convert a string to an open block description (indent, block_type) *)
+let open_box_of_string str =
+ if str = "" then (0, Pp_box) else
+ let len = String.length str in
+ let invalid_box () = failwith_message "invalid box description %S" str in
+ let rec parse_spaces i =
+ if i = len then i else
+ match str.[i] with
+ | ' ' | '\t' -> parse_spaces (i + 1)
+ | _ -> i
+ and parse_lword i j =
+ if j = len then j else
+ match str.[j] with
+ | 'a' .. 'z' -> parse_lword i (j + 1)
+ | _ -> j
+ and parse_int i j =
+ if j = len then j else
+ match str.[j] with
+ | '0' .. '9' | '-' -> parse_int i (j + 1)
+ | _ -> j in
+ let wstart = parse_spaces 0 in
+ let wend = parse_lword wstart wstart in
+ let box_name = String.sub str wstart (wend - wstart) in
+ let nstart = parse_spaces wend in
+ let nend = parse_int nstart nstart in
+ let indent =
+ if nstart = nend then 0 else
+ try int_of_string (String.sub str nstart (nend - nstart))
+ with Failure _ -> invalid_box () in
+ let exp_end = parse_spaces nend in
+ let () = if exp_end <> len then invalid_box () in
+ let box_type = match box_name with
+ | "" | "b" -> Pp_box
+ | "h" -> Pp_hbox
+ | "v" -> Pp_vbox
+ | "hv" -> Pp_hvbox
+ | "hov" -> Pp_hovbox
+ | _ -> invalid_box () in
+ (indent, box_type)
+
+(******************************************************************************)
+ (* Parsing tools *)
+
+(* Create a padding_fmt_ebb from a padding and a format. *)
+(* Copy the padding to disjoin the type parameters of argument and result. *)
+let make_padding_fmt_ebb : type x y .
+ (x, y) padding -> (_, _, _, _, _, _) fmt ->
+ (_, _, _, _, _) padding_fmt_ebb =
+fun pad fmt -> match pad with
+ | No_padding -> Padding_fmt_EBB (No_padding, fmt)
+ | Lit_padding (s, w) -> Padding_fmt_EBB (Lit_padding (s, w), fmt)
+ | Arg_padding s -> Padding_fmt_EBB (Arg_padding s, fmt)
+
+(* Create a precision_fmt_ebb from a precision and a format. *)
+(* Copy the precision to disjoin the type parameters of argument and result. *)
+let make_precision_fmt_ebb : type x y .
+ (x, y) precision -> (_, _, _, _, _, _) fmt ->
+ (_, _, _, _, _) precision_fmt_ebb =
+fun prec fmt -> match prec with
+ | No_precision -> Precision_fmt_EBB (No_precision, fmt)
+ | Lit_precision p -> Precision_fmt_EBB (Lit_precision p, fmt)
+ | Arg_precision -> Precision_fmt_EBB (Arg_precision, fmt)
+
+(* Create a padprec_fmt_ebb forma a padding, a precision and a format. *)
+(* Copy the padding and the precision to disjoin type parameters of arguments
+ and result. *)
+let make_padprec_fmt_ebb : type x y z t .
+ (x, y) padding -> (z, t) precision ->
+ (_, _, _, _, _, _) fmt ->
+ (_, _, _, _, _) padprec_fmt_ebb =
+fun pad prec fmt ->
+ let Precision_fmt_EBB (prec, fmt') = make_precision_fmt_ebb prec fmt in
+ match pad with
+ | No_padding -> Padprec_fmt_EBB (No_padding, prec, fmt')
+ | Lit_padding (s, w) -> Padprec_fmt_EBB (Lit_padding (s, w), prec, fmt')
+ | Arg_padding s -> Padprec_fmt_EBB (Arg_padding s, prec, fmt')
+
+(******************************************************************************)
+ (* Format parsing *)
+
+(* Parse a string representing a format and create a fmt_ebb. *)
+(* Raise an Failure exception in case of invalid format. *)
+let fmt_ebb_of_string ?legacy_behavior str =
+ (* Parameters naming convention: *)
+ (* - lit_start: start of the literal sequence. *)
+ (* - str_ind: current index in the string. *)
+ (* - end_ind: end of the current (sub-)format. *)
+ (* - pct_ind: index of the '%' in the current micro-format. *)
+ (* - zero: is the '0' flag defined in the current micro-format. *)
+ (* - minus: is the '-' flag defined in the current micro-format. *)
+ (* - plus: is the '+' flag defined in the current micro-format. *)
+ (* - sharp: is the '#' flag defined in the current micro-format. *)
+ (* - space: is the ' ' flag defined in the current micro-format. *)
+ (* - ign: is the '_' flag defined in the current micro-format. *)
+ (* - pad: padding of the current micro-format. *)
+ (* - prec: precision of the current micro-format. *)
+ (* - symb: char representing the conversion ('c', 's', 'd', ...). *)
+ (* - char_set: set of characters as bitmap (see scanf %[...]). *)
+
+ let legacy_behavior = match legacy_behavior with
+ | Some flag -> flag
+ | None -> true
+ (** When this flag is enabled, the format parser tries to behave as
+ the <4.02 implementations, in particular it ignores most benine
+ nonsensical format. When the flag is disabled, it will reject any
+ format that is not accepted by the specification.
+
+ A typical example would be "%+ d": specifying both '+' (if the
+ number is positive, pad with a '+' to get the same width as
+ negative numbres) and ' ' (if the number is positive, pad with
+ a space) does not make sense, but the legacy (< 4.02)
+ implementation was happy to just ignore the space.
+ *)
+ in
+
+ (* Raise a Failure with a friendly error message. *)
+ (* Used when the end of the format (or the current sub-format) was encoutered
+ unexpectedly. *)
+ let unexpected_end_of_format end_ind =
+ failwith_message
+ "invalid format %S: at character number %d, unexpected end of format"
+ str end_ind;
+
+ (* Raise Failure with a friendly error message about an option dependencie
+ problem. *)
+ and invalid_format_without str_ind c s =
+ failwith_message
+ "invalid format %S: at character number %d, '%c' without %s"
+ str str_ind c s
+
+ (* Raise Failure with a friendly error message about an unexpected
+ character. *)
+ and expected_character str_ind expected read =
+ failwith_message
+ "invalid format %S: at character number %d, %s expected, read %C"
+ str str_ind expected read in
+
+ (* Parse the string from beg_ind (included) to end_ind (excluded). *)
+ let rec parse : type e f . int -> int -> (_, _, e, f) fmt_ebb =
+ fun beg_ind end_ind -> parse_literal beg_ind beg_ind end_ind
+
+ (* Read literal characters up to '%' or '@' special characters. *)
+ and parse_literal : type e f . int -> int -> int -> (_, _, e, f) fmt_ebb =
+ fun lit_start str_ind end_ind ->
+ if str_ind = end_ind then add_literal lit_start str_ind End_of_format else
+ match str.[str_ind] with
+ | '%' ->
+ let Fmt_EBB fmt_rest = parse_format str_ind end_ind in
+ add_literal lit_start str_ind fmt_rest
+ | '@' ->
+ let Fmt_EBB fmt_rest = parse_after_at (str_ind + 1) end_ind in
+ add_literal lit_start str_ind fmt_rest
+ | _ ->
+ parse_literal lit_start (str_ind + 1) end_ind
+
+ (* Parse a format after '%' *)
+ and parse_format : type e f . int -> int -> (_, _, e, f) fmt_ebb =
+ fun pct_ind end_ind -> parse_ign pct_ind (pct_ind + 1) end_ind
+
+ and parse_ign : type e f . int -> int -> int -> (_, _, e, f) fmt_ebb =
+ fun pct_ind str_ind end_ind ->
+ if str_ind = end_ind then unexpected_end_of_format end_ind;
+ match str.[str_ind] with
+ | '_' -> parse_flags pct_ind (str_ind+1) end_ind true
+ | _ -> parse_flags pct_ind str_ind end_ind false
+
+ and parse_flags : type e f . int -> int -> int -> bool -> (_, _, e, f) fmt_ebb =
+ fun pct_ind str_ind end_ind ign ->
+ let zero = ref false and minus = ref false
+ and plus = ref false and space = ref false
+ and sharp = ref false in
+ let set_flag str_ind flag =
+ (* in legacy mode, duplicate flags are accepted *)
+ if !flag && not legacy_behavior then
+ failwith_message
+ "invalid format %S: at character number %d, duplicate flag %C"
+ str str_ind str.[str_ind];
+ flag := true;
+ in
+ let rec read_flags str_ind =
+ if str_ind = end_ind then unexpected_end_of_format end_ind;
+ begin match str.[str_ind] with
+ | '0' -> set_flag str_ind zero; read_flags (str_ind + 1)
+ | '-' -> set_flag str_ind minus; read_flags (str_ind + 1)
+ | '+' -> set_flag str_ind plus; read_flags (str_ind + 1)
+ | '#' -> set_flag str_ind sharp; read_flags (str_ind + 1)
+ | ' ' -> set_flag str_ind space; read_flags (str_ind + 1)
+ | _ ->
+ parse_padding pct_ind str_ind end_ind
+ !zero !minus !plus !sharp !space ign
+ end
+ in
+ read_flags str_ind
+
+ (* Try to read a digital or a '*' padding. *)
+ and parse_padding : type e f .
+ int -> int -> int -> bool -> bool -> bool -> bool -> bool -> bool ->
+ (_, _, e, f) fmt_ebb =
+ fun pct_ind str_ind end_ind zero minus plus sharp space ign ->
+ if str_ind = end_ind then unexpected_end_of_format end_ind;
+ let padty = match zero, minus with
+ | false, false -> Right
+ | false, true -> Left
+ | true, false -> Zeros
+ | true, true ->
+ if legacy_behavior then Left
+ else incompatible_flag pct_ind str_ind '-' "0" in
+ match str.[str_ind] with
+ | '0' .. '9' ->
+ let new_ind, width = parse_positive str_ind end_ind 0 in
+ parse_after_padding pct_ind new_ind end_ind plus sharp space ign
+ (Lit_padding (padty, width))
+ | '*' ->
+ parse_after_padding pct_ind (str_ind + 1) end_ind plus sharp space ign
+ (Arg_padding padty)
+ | _ ->
+ if legacy_behavior then
+ parse_after_padding pct_ind str_ind end_ind plus sharp space ign
+ No_padding
+ else begin match padty with
+ | Left ->
+ invalid_format_without (str_ind - 1) '-' "padding"
+ | Zeros ->
+ invalid_format_without (str_ind - 1) '0' "padding"
+ | Right ->
+ parse_after_padding pct_ind str_ind end_ind plus sharp space ign
+ No_padding
+ end
+
+ (* Is precision defined? *)
+ and parse_after_padding : type x e f .
+ int -> int -> int -> bool -> bool -> bool -> bool -> (x, _) padding ->
+ (_, _, e, f) fmt_ebb =
+ fun pct_ind str_ind end_ind plus sharp space ign pad ->
+ if str_ind = end_ind then unexpected_end_of_format end_ind;
+ match str.[str_ind] with
+ | '.' ->
+ parse_precision pct_ind (str_ind + 1) end_ind plus sharp space ign pad
+ | symb ->
+ parse_conversion pct_ind (str_ind + 1) end_ind plus sharp space ign pad
+ No_precision symb
+
+ (* Read the digital or '*' precision. *)
+ and parse_precision : type x e f .
+ int -> int -> int -> bool -> bool -> bool -> bool -> (x, _) padding ->
+ (_, _, e, f) fmt_ebb =
+ fun pct_ind str_ind end_ind plus sharp space ign pad ->
+ if str_ind = end_ind then unexpected_end_of_format end_ind;
+ let parse_literal str_ind =
+ let new_ind, prec = parse_positive str_ind end_ind 0 in
+ if new_ind = end_ind then unexpected_end_of_format end_ind;
+ parse_conversion pct_ind (new_ind + 1) end_ind plus sharp space ign pad
+ (Lit_precision prec) str.[new_ind] in
+ match str.[str_ind] with
+ | '0' .. '9' -> parse_literal str_ind
+ | ('+' | '-') when legacy_behavior ->
+ (* Legacy mode would accept and ignore '+' or '-' before the
+ integer describing the desired precision; not that this
+ cannot happen for padding width, as '+' and '-' already have
+ a semantics there.
+
+ That said, the idea (supported by this tweak) that width and
+ precision literals are "integer literals" in the OCaml sense is
+ still blatantly wrong, as 123_456 or 0xFF are rejected. *)
+ parse_literal (str_ind + 1)
+ | '*' ->
+ parse_after_precision pct_ind (str_ind + 1) end_ind plus sharp space ign
+ pad Arg_precision
+ | _ ->
+ if legacy_behavior then
+ (* note that legacy implementation did not ignore '.' without
+ a number (as it does for padding indications), but
+ interprets it as '.0' *)
+ parse_after_precision pct_ind str_ind end_ind plus sharp space ign pad (Lit_precision 0) else
+ invalid_format_without (str_ind - 1) '.' "precision"
+
+ (* Try to read the conversion. *)
+ and parse_after_precision : type x z e f .
+ int -> int -> int -> bool -> bool -> bool -> bool -> (x, _) padding ->
+ (z, _) precision -> (_, _, e, f) fmt_ebb =
+ fun pct_ind str_ind end_ind plus sharp space ign pad prec ->
+ if str_ind = end_ind then unexpected_end_of_format end_ind;
+ parse_conversion pct_ind (str_ind + 1) end_ind plus sharp space ign pad prec
+ str.[str_ind]
+
+ (* Case analysis on conversion. *)
+ and parse_conversion : type x y z t e f .
+ int -> int -> int -> bool -> bool -> bool -> bool -> (x, y) padding ->
+ (z, t) precision -> char -> (_, _, e, f) fmt_ebb =
+ fun pct_ind str_ind end_ind plus sharp space ign pad prec symb ->
+ (* Flags used to check option usages/compatibilities. *)
+ let plus_used = ref false and sharp_used = ref false
+ and space_used = ref false and ign_used = ref false
+ and pad_used = ref false and prec_used = ref false in
+
+ (* Access to options, update flags. *)
+ let get_plus () = plus_used := true; plus
+ and get_sharp () = sharp_used := true; sharp
+ and get_space () = space_used := true; space
+ and get_ign () = ign_used := true; ign
+ and get_pad () = pad_used := true; pad
+ and get_prec () = prec_used := true; prec in
+
+ (* Check that padty <> Zeros. *)
+ let check_no_0 symb (type a) (type b) (pad : (a,b) padding) =
+ match pad with
+ | No_padding -> pad
+ | Lit_padding ((Left | Right), _) -> pad
+ | Arg_padding (Left | Right) -> pad
+ | Lit_padding (Zeros, width) ->
+ if legacy_behavior then Lit_padding (Right, width)
+ else incompatible_flag pct_ind str_ind symb "0"
+ | Arg_padding Zeros ->
+ if legacy_behavior then Arg_padding Right
+ else incompatible_flag pct_ind str_ind symb "0"
+ in
+
+ (* Get padding as a pad_option (see "%_", "%{", "%(" and "%[").
+ (no need for legacy mode tweaking, those were rejected by the
+ legacy parser as well) *)
+ let get_pad_opt c = match get_pad () with
+ | No_padding -> None
+ | Lit_padding (Right, width) -> Some width
+ | Lit_padding (Zeros, width) ->
+ if legacy_behavior then Some width
+ else incompatible_flag pct_ind str_ind c "'0'"
+ | Lit_padding (Left, width) ->
+ if legacy_behavior then Some width
+ else incompatible_flag pct_ind str_ind c "'-'"
+ | Arg_padding _ -> incompatible_flag pct_ind str_ind c "'*'"
+ in
+
+ (* Get precision as a prec_option (see "%_f").
+ (no need for legacy mode tweaking, those were rejected by the
+ legacy parser as well) *)
+ let get_prec_opt () = match get_prec () with
+ | No_precision -> None
+ | Lit_precision ndec -> Some ndec
+ | Arg_precision -> incompatible_flag pct_ind str_ind '_' "'*'"
+ in
+
+ let fmt_result = match symb with
+ | ',' ->
+ parse str_ind end_ind
+ | 'c' ->
+ let Fmt_EBB fmt_rest = parse str_ind end_ind in
+ if get_ign () then Fmt_EBB (Ignored_param (Ignored_char, fmt_rest))
+ else Fmt_EBB (Char fmt_rest)
+ | 'C' ->
+ let Fmt_EBB fmt_rest = parse str_ind end_ind in
+ if get_ign () then Fmt_EBB (Ignored_param (Ignored_caml_char,fmt_rest))
+ else Fmt_EBB (Caml_char fmt_rest)
+ | 's' ->
+ let pad = check_no_0 symb (get_pad ()) in
+ let Fmt_EBB fmt_rest = parse str_ind end_ind in
+ if get_ign () then
+ let ignored = Ignored_string (get_pad_opt '_') in
+ Fmt_EBB (Ignored_param (ignored, fmt_rest))
+ else
+ let Padding_fmt_EBB (pad', fmt_rest') =
+ make_padding_fmt_ebb pad fmt_rest in
+ Fmt_EBB (String (pad', fmt_rest'))
+ | 'S' ->
+ let pad = check_no_0 symb (get_pad ()) in
+ let Fmt_EBB fmt_rest = parse str_ind end_ind in
+ if get_ign () then
+ let ignored = Ignored_caml_string (get_pad_opt '_') in
+ Fmt_EBB (Ignored_param (ignored, fmt_rest))
+ else
+ let Padding_fmt_EBB (pad', fmt_rest') =
+ make_padding_fmt_ebb pad fmt_rest in
+ Fmt_EBB (Caml_string (pad', fmt_rest'))
+ | 'd' | 'i' | 'x' | 'X' | 'o' | 'u' ->
+ let iconv = compute_int_conv pct_ind str_ind (get_plus ()) (get_sharp ())
+ (get_space ()) symb in
+ let Fmt_EBB fmt_rest = parse str_ind end_ind in
+ if get_ign () then
+ let ignored = Ignored_int (iconv, get_pad_opt '_') in
+ Fmt_EBB (Ignored_param (ignored, fmt_rest))
+ else
+ let Padprec_fmt_EBB (pad', prec', fmt_rest') =
+ make_padprec_fmt_ebb (get_pad ()) (get_prec ()) fmt_rest in
+ Fmt_EBB (Int (iconv, pad', prec', fmt_rest'))
+ | 'N' ->
+ let Fmt_EBB fmt_rest = parse str_ind end_ind in
+ let counter = Token_counter in
+ if get_ign () then
+ let ignored = Ignored_scan_get_counter counter in
+ Fmt_EBB (Ignored_param (ignored, fmt_rest))
+ else
+ Fmt_EBB (Scan_get_counter (counter, fmt_rest))
+ | 'l' | 'n' | 'L' when str_ind=end_ind || not (is_int_base str.[str_ind]) ->
+ let Fmt_EBB fmt_rest = parse str_ind end_ind in
+ let counter = counter_of_char symb in
+ if get_ign () then
+ let ignored = Ignored_scan_get_counter counter in
+ Fmt_EBB (Ignored_param (ignored, fmt_rest))
+ else
+ Fmt_EBB (Scan_get_counter (counter, fmt_rest))
+ | 'l' ->
+ let iconv =
+ compute_int_conv pct_ind (str_ind + 1) (get_plus ()) (get_sharp ())
+ (get_space ()) str.[str_ind] in
+ let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in
+ if get_ign () then
+ let ignored = Ignored_int32 (iconv, get_pad_opt '_') in
+ Fmt_EBB (Ignored_param (ignored, fmt_rest))
+ else
+ let Padprec_fmt_EBB (pad', prec', fmt_rest') =
+ make_padprec_fmt_ebb (get_pad ()) (get_prec ()) fmt_rest in
+ Fmt_EBB (Int32 (iconv, pad', prec', fmt_rest'))
+ | 'n' ->
+ let iconv =
+ compute_int_conv pct_ind (str_ind + 1) (get_plus ())
+ (get_sharp ()) (get_space ()) str.[str_ind] in
+ let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in
+ if get_ign () then
+ let ignored = Ignored_nativeint (iconv, get_pad_opt '_') in
+ Fmt_EBB (Ignored_param (ignored, fmt_rest))
+ else
+ let Padprec_fmt_EBB (pad', prec', fmt_rest') =
+ make_padprec_fmt_ebb (get_pad ()) (get_prec ()) fmt_rest in
+ Fmt_EBB (Nativeint (iconv, pad', prec', fmt_rest'))
+ | 'L' ->
+ let iconv =
+ compute_int_conv pct_ind (str_ind + 1) (get_plus ()) (get_sharp ())
+ (get_space ()) str.[str_ind] in
+ let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in
+ if get_ign () then
+ let ignored = Ignored_int64 (iconv, get_pad_opt '_') in
+ Fmt_EBB (Ignored_param (ignored, fmt_rest))
+ else
+ let Padprec_fmt_EBB (pad', prec', fmt_rest') =
+ make_padprec_fmt_ebb (get_pad ()) (get_prec ()) fmt_rest in
+ Fmt_EBB (Int64 (iconv, pad', prec', fmt_rest'))
+ | 'f' | 'e' | 'E' | 'g' | 'G' | 'F' ->
+ let fconv = compute_float_conv pct_ind str_ind (get_plus ())
+ (get_space ()) symb in
+ let Fmt_EBB fmt_rest = parse str_ind end_ind in
+ if get_ign () then
+ let ignored = Ignored_float (get_pad_opt '_', get_prec_opt ()) in
+ Fmt_EBB (Ignored_param (ignored, fmt_rest))
+ else
+ let Padprec_fmt_EBB (pad', prec', fmt_rest') =
+ make_padprec_fmt_ebb (get_pad ()) (get_prec ()) fmt_rest in
+ Fmt_EBB (Float (fconv, pad', prec', fmt_rest'))
+ | 'b' | 'B' ->
+ let Fmt_EBB fmt_rest = parse str_ind end_ind in
+ if get_ign () then Fmt_EBB (Ignored_param (Ignored_bool, fmt_rest))
+ else Fmt_EBB (Bool fmt_rest)
+ | 'a' ->
+ let Fmt_EBB fmt_rest = parse str_ind end_ind in
+ Fmt_EBB (Alpha fmt_rest)
+ | 't' ->
+ let Fmt_EBB fmt_rest = parse str_ind end_ind in
+ Fmt_EBB (Theta fmt_rest)
+ | 'r' ->
+ let Fmt_EBB fmt_rest = parse str_ind end_ind in
+ if get_ign () then Fmt_EBB (Ignored_param (Ignored_reader, fmt_rest))
+ else Fmt_EBB (Reader fmt_rest)
+ | '!' ->
+ let Fmt_EBB fmt_rest = parse str_ind end_ind in
+ Fmt_EBB (Flush fmt_rest)
+ | ('%' | '@') as c ->
+ let Fmt_EBB fmt_rest = parse str_ind end_ind in
+ Fmt_EBB (Char_literal (c, fmt_rest))
+ | '{' ->
+ let sub_end = search_subformat_end str_ind end_ind '}' in
+ let Fmt_EBB sub_fmt = parse str_ind sub_end in
+ let Fmt_EBB fmt_rest = parse (sub_end + 2) end_ind in
+ let sub_fmtty = fmtty_of_fmt sub_fmt in
+ if get_ign () then
+ let ignored = Ignored_format_arg (get_pad_opt '_', sub_fmtty) in
+ Fmt_EBB (Ignored_param (ignored, fmt_rest))
+ else
+ Fmt_EBB (Format_arg (get_pad_opt '{', sub_fmtty, fmt_rest))
+ | '(' ->
+ let sub_end = search_subformat_end str_ind end_ind ')' in
+ let Fmt_EBB fmt_rest = parse (sub_end + 2) end_ind in
+ let Fmt_EBB sub_fmt = parse str_ind sub_end in
+ let sub_fmtty = fmtty_of_fmt sub_fmt in
+ if get_ign () then
+ let ignored = Ignored_format_subst (get_pad_opt '_', sub_fmtty) in
+ Fmt_EBB (Ignored_param (ignored, fmt_rest))
+ else
+ Fmt_EBB (Format_subst (get_pad_opt '(', sub_fmtty, fmt_rest))
+ | '[' ->
+ let next_ind, char_set = parse_char_set str_ind end_ind in
+ let Fmt_EBB fmt_rest = parse next_ind end_ind in
+ if get_ign () then
+ let ignored = Ignored_scan_char_set (get_pad_opt '_', char_set) in
+ Fmt_EBB (Ignored_param (ignored, fmt_rest))
+ else
+ Fmt_EBB (Scan_char_set (get_pad_opt '[', char_set, fmt_rest))
+ | '-' | '+' | '#' | ' ' | '_' ->
+ failwith_message
+ "invalid format %S: at character number %d, \
+ flag %C is only allowed after the '%%', before padding and precision"
+ str pct_ind symb
+ | _ ->
+ failwith_message
+ "invalid format %S: at character number %d, \
+ invalid conversion \"%%%c\"" str (str_ind - 1) symb
+ in
+ (* Check for unused options, and reject them as incompatible.
+
+ Such checks need to be disabled in legacy mode, as the legacy
+ parser silently ignored incompatible flags. *)
+ if not legacy_behavior then begin
+ if not !plus_used && plus then
+ incompatible_flag pct_ind str_ind symb "'+'";
+ if not !sharp_used && sharp then
+ incompatible_flag pct_ind str_ind symb "'#'";
+ if not !space_used && space then
+ incompatible_flag pct_ind str_ind symb "' '";
+ if not !pad_used && Padding_EBB pad <> Padding_EBB No_padding then
+ incompatible_flag pct_ind str_ind symb "`padding'";
+ if not !prec_used && Precision_EBB prec <> Precision_EBB No_precision then
+ incompatible_flag pct_ind str_ind (if ign then '_' else symb)
+ "`precision'";
+ if ign && plus then incompatible_flag pct_ind str_ind '_' "'+'";
+ end;
+ (* this last test must not be disabled in legacy mode,
+ as ignoring it would typically result in a different typing
+ than what the legacy parser used *)
+ if not !ign_used && ign then
+ begin match symb with
+ (* argument-less formats can safely be ignored in legacy mode *)
+ | ('@' | '%' | '!' | ',') when legacy_behavior -> ()
+ | _ ->
+ incompatible_flag pct_ind str_ind symb "'_'"
+ end;
+ fmt_result
+
+ (* Parse formatting informations (after '@'). *)
+ and parse_after_at : type e f . int -> int -> (_, _, e, f) fmt_ebb =
+ fun str_ind end_ind ->
+ if str_ind = end_ind then Fmt_EBB (Char_literal ('@', End_of_format))
+ else
+ match str.[str_ind] with
+ | '[' ->
+ parse_tag false (str_ind + 1) end_ind
+ | ']' ->
+ let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in
+ Fmt_EBB (Formatting_lit (Close_box, fmt_rest))
+ | '{' ->
+ parse_tag true (str_ind + 1) end_ind
+ | '}' ->
+ let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in
+ Fmt_EBB (Formatting_lit (Close_tag, fmt_rest))
+ | ',' ->
+ let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in
+ Fmt_EBB (Formatting_lit (Break ("@,", 0, 0), fmt_rest))
+ | ' ' ->
+ let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in
+ Fmt_EBB (Formatting_lit (Break ("@ ", 1, 0), fmt_rest))
+ | ';' ->
+ parse_good_break (str_ind + 1) end_ind
+ | '?' ->
+ let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in
+ Fmt_EBB (Formatting_lit (FFlush, fmt_rest))
+ | '\n' ->
+ let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in
+ Fmt_EBB (Formatting_lit (Force_newline, fmt_rest))
+ | '.' ->
+ let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in
+ Fmt_EBB (Formatting_lit (Flush_newline, fmt_rest))
+ | '<' ->
+ parse_magic_size (str_ind + 1) end_ind
+ | '@' ->
+ let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in
+ Fmt_EBB (Formatting_lit (Escaped_at, fmt_rest))
+ | '%' when str_ind + 1 < end_ind && str.[str_ind + 1] = '%' ->
+ let Fmt_EBB fmt_rest = parse (str_ind + 2) end_ind in
+ Fmt_EBB (Formatting_lit (Escaped_percent, fmt_rest))
+ | '%' ->
+ let Fmt_EBB fmt_rest = parse str_ind end_ind in
+ Fmt_EBB (Char_literal ('@', fmt_rest))
+ | c ->
+ let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in
+ Fmt_EBB (Formatting_lit (Scan_indic c, fmt_rest))
+
+ and check_open_box : type a b c d e f . (a, b, c, d, e, f) fmt -> unit =
+ fun fmt -> match fmt with
+ | String_literal (str, End_of_format) -> (
+ try ignore (open_box_of_string str) with Failure _ ->
+ ((* Emit warning: invalid open box *))
+ )
+ | _ -> ()
+
+ (* Try to read the optionnal <name> after "@{" or "@[". *)
+ and parse_tag : type e f . bool -> int -> int -> (_, _, e, f) fmt_ebb =
+ fun is_open_tag str_ind end_ind ->
+ try
+ if str_ind = end_ind then raise Not_found;
+ match str.[str_ind] with
+ | '<' ->
+ let ind = String.index_from str (str_ind + 1) '>' in
+ if ind >= end_ind then raise Not_found;
+ let sub_str = String.sub str str_ind (ind - str_ind + 1) in
+ let Fmt_EBB fmt_rest = parse (ind + 1) end_ind in
+ let Fmt_EBB sub_fmt = parse str_ind (ind + 1) in
+ let sub_format = Format (sub_fmt, sub_str) in
+ let formatting = if is_open_tag then Open_tag sub_format else (
+ check_open_box sub_fmt;
+ Open_box sub_format) in
+ Fmt_EBB (Formatting_gen (formatting, fmt_rest))
+ | _ ->
+ raise Not_found
+ with Not_found ->
+ let Fmt_EBB fmt_rest = parse str_ind end_ind in
+ let sub_format = Format (End_of_format, "") in
+ let formatting =
+ if is_open_tag then Open_tag sub_format else Open_box sub_format in
+ Fmt_EBB (Formatting_gen (formatting, fmt_rest))
+
+ (* Try to read the optionnal <width offset> after "@;". *)
+ and parse_good_break : type e f . int -> int -> (_, _, e, f) fmt_ebb =
+ fun str_ind end_ind ->
+ let next_ind, formatting_lit =
+ try
+ if str_ind = end_ind || str.[str_ind] <> '<' then raise Not_found;
+ let str_ind_1 = parse_spaces (str_ind + 1) end_ind in
+ match str.[str_ind_1] with
+ | '0' .. '9' | '-' -> (
+ let str_ind_2, width = parse_integer str_ind_1 end_ind in
+ let str_ind_3 = parse_spaces str_ind_2 end_ind in
+ match str.[str_ind_3] with
+ | '>' ->
+ let s = String.sub str (str_ind-2) (str_ind_3-str_ind+3) in
+ str_ind_3 + 1, Break (s, width, 0)
+ | '0' .. '9' | '-' ->
+ let str_ind_4, offset = parse_integer str_ind_3 end_ind in
+ let str_ind_5 = parse_spaces str_ind_4 end_ind in
+ if str.[str_ind_5] <> '>' then raise Not_found;
+ let s = String.sub str (str_ind-2) (str_ind_5-str_ind+3) in
+ str_ind_5 + 1, Break (s, width, offset)
+ | _ -> raise Not_found
+ )
+ | _ -> raise Not_found
+ with Not_found | Failure _ ->
+ str_ind, Break ("@;", 1, 0)
+ in
+ let Fmt_EBB fmt_rest = parse next_ind end_ind in
+ Fmt_EBB (Formatting_lit (formatting_lit, fmt_rest))
+
+ (* Parse the size in a <n>. *)
+ and parse_magic_size : type e f . int -> int -> (_, _, e, f) fmt_ebb =
+ fun str_ind end_ind ->
+ match
+ try
+ let str_ind_1 = parse_spaces str_ind end_ind in
+ match str.[str_ind_1] with
+ | '0' .. '9' | '-' ->
+ let str_ind_2, size = parse_integer str_ind_1 end_ind in
+ let str_ind_3 = parse_spaces str_ind_2 end_ind in
+ if str.[str_ind_3] <> '>' then raise Not_found;
+ let s = String.sub str (str_ind - 2) (str_ind_3 - str_ind + 3) in
+ Some (str_ind_3 + 1, Magic_size (s, size))
+ | _ -> None
+ with Not_found | Failure _ ->
+ None
+ with
+ | Some (next_ind, formatting_lit) ->
+ let Fmt_EBB fmt_rest = parse next_ind end_ind in
+ Fmt_EBB (Formatting_lit (formatting_lit, fmt_rest))
+ | None ->
+ let Fmt_EBB fmt_rest = parse str_ind end_ind in
+ Fmt_EBB (Formatting_lit (Scan_indic '<', fmt_rest))
+
+ (* Parse and construct a char set. *)
+ and parse_char_set str_ind end_ind =
+ if str_ind = end_ind then unexpected_end_of_format end_ind;
+
+ let char_set = create_char_set () in
+ let add_char c =
+ add_in_char_set char_set c;
+ in
+ let add_range c c' =
+ for i = int_of_char c to int_of_char c' do
+ add_in_char_set char_set (char_of_int i);
+ done;
+ in
+
+ let fail_single_percent str_ind =
+ failwith_message
+ "invalid format %S: '%%' alone is not accepted in character sets, \
+ use %%%% instead at position %d." str str_ind;
+ in
+
+ (* Parse the first character of a char set. *)
+ let rec parse_char_set_start str_ind end_ind =
+ if str_ind = end_ind then unexpected_end_of_format end_ind;
+ let c = str.[str_ind] in
+ parse_char_set_after_char (str_ind + 1) end_ind c;
+
+ (* Parse the content of a char set until the first ']'. *)
+ and parse_char_set_content str_ind end_ind =
+ if str_ind = end_ind then unexpected_end_of_format end_ind;
+ match str.[str_ind] with
+ | ']' ->
+ str_ind + 1
+ | '-' ->
+ add_char '-';
+ parse_char_set_content (str_ind + 1) end_ind;
+ | c ->
+ parse_char_set_after_char (str_ind + 1) end_ind c;
+
+ (* Test for range in char set. *)
+ and parse_char_set_after_char str_ind end_ind c =
+ if str_ind = end_ind then unexpected_end_of_format end_ind;
+ match str.[str_ind] with
+ | ']' ->
+ add_char c;
+ str_ind + 1
+ | '-' ->
+ parse_char_set_after_minus (str_ind + 1) end_ind c
+ | ('%' | '@') as c' when c = '%' ->
+ add_char c';
+ parse_char_set_content (str_ind + 1) end_ind
+ | c' ->
+ if c = '%' then fail_single_percent str_ind;
+ (* note that '@' alone is accepted, as done by the legacy implementation;
+ the documentation specifically requires %@ so we could warn on that *)
+ add_char c;
+ parse_char_set_after_char (str_ind + 1) end_ind c'
+
+ (* Manage range in char set (except if the '-' the last char before ']') *)
+ and parse_char_set_after_minus str_ind end_ind c =
+ if str_ind = end_ind then unexpected_end_of_format end_ind;
+ match str.[str_ind] with
+ | ']' ->
+ add_char c;
+ add_char '-';
+ str_ind + 1
+ | '%' ->
+ if str_ind + 1 = end_ind then unexpected_end_of_format end_ind;
+ begin match str.[str_ind + 1] with
+ | ('%' | '@') as c' ->
+ add_range c c';
+ parse_char_set_content (str_ind + 2) end_ind
+ | _ -> fail_single_percent str_ind
+ end
+ | c' ->
+ add_range c c';
+ parse_char_set_content (str_ind + 1) end_ind
+ in
+ let str_ind, reverse =
+ if str_ind = end_ind then unexpected_end_of_format end_ind;
+ match str.[str_ind] with
+ | '^' -> str_ind + 1, true
+ | _ -> str_ind, false in
+ let next_ind = parse_char_set_start str_ind end_ind in
+ let char_set = freeze_char_set char_set in
+ next_ind, (if reverse then rev_char_set char_set else char_set)
+
+ (* Consume all next spaces, raise an Failure if end_ind is reached. *)
+ and parse_spaces str_ind end_ind =
+ if str_ind = end_ind then unexpected_end_of_format end_ind;
+ if str.[str_ind] = ' ' then parse_spaces (str_ind + 1) end_ind else str_ind
+
+ (* Read a positive integer from the string, raise a Failure if end_ind is
+ reached. *)
+ and parse_positive str_ind end_ind acc =
+ if str_ind = end_ind then unexpected_end_of_format end_ind;
+ match str.[str_ind] with
+ | '0' .. '9' as c ->
+ let new_acc = acc * 10 + (int_of_char c - int_of_char '0') in
+ if new_acc > Sys.max_string_length then
+ failwith_message
+ "invalid format %S: integer %d is greater than the limit %d"
+ str new_acc Sys.max_string_length
+ else
+ parse_positive (str_ind + 1) end_ind new_acc
+ | _ -> str_ind, acc
+
+ (* Read a positive or negative integer from the string, raise a Failure
+ if end_ind is reached. *)
+ and parse_integer str_ind end_ind =
+ if str_ind = end_ind then unexpected_end_of_format end_ind;
+ match str.[str_ind] with
+ | '0' .. '9' -> parse_positive str_ind end_ind 0
+ | '-' -> (
+ if str_ind + 1 = end_ind then unexpected_end_of_format end_ind;
+ match str.[str_ind + 1] with
+ | '0' .. '9' ->
+ let next_ind, n = parse_positive (str_ind + 1) end_ind 0 in
+ next_ind, -n
+ | c ->
+ expected_character (str_ind + 1) "digit" c
+ )
+ | _ -> assert false
+
+ (* Add a literal to a format from a literal character sub-sequence. *)
+ and add_literal : type a d e f .
+ int -> int -> (a, _, _, d, e, f) fmt ->
+ (_, _, e, f) fmt_ebb =
+ fun lit_start str_ind fmt -> match str_ind - lit_start with
+ | 0 -> Fmt_EBB fmt
+ | 1 -> Fmt_EBB (Char_literal (str.[lit_start], fmt))
+ | size -> Fmt_EBB (String_literal (String.sub str lit_start size, fmt))
+
+ (* Search the end of the current sub-format
+ (i.e. the corresponding "%}" or "%)") *)
+ and search_subformat_end str_ind end_ind c =
+ if str_ind = end_ind then
+ failwith_message
+ "invalid format %S: unclosed sub-format, \
+ expected \"%%%c\" at character number %d" str c end_ind;
+ match str.[str_ind] with
+ | '%' ->
+ if str_ind + 1 = end_ind then unexpected_end_of_format end_ind;
+ if str.[str_ind + 1] = c then (* End of format found *) str_ind else
+ begin match str.[str_ind + 1] with
+ | '_' ->
+ (* Search for "%_(" or "%_{". *)
+ if str_ind + 2 = end_ind then unexpected_end_of_format end_ind;
+ begin match str.[str_ind + 2] with
+ | '{' ->
+ let sub_end = search_subformat_end (str_ind + 3) end_ind '}' in
+ search_subformat_end (sub_end + 2) end_ind c
+ | '(' ->
+ let sub_end = search_subformat_end (str_ind + 3) end_ind ')' in
+ search_subformat_end (sub_end + 2) end_ind c
+ | _ -> search_subformat_end (str_ind + 3) end_ind c
+ end
+ | '{' ->
+ (* %{...%} sub-format found. *)
+ let sub_end = search_subformat_end (str_ind + 2) end_ind '}' in
+ search_subformat_end (sub_end + 2) end_ind c
+ | '(' ->
+ (* %(...%) sub-format found. *)
+ let sub_end = search_subformat_end (str_ind + 2) end_ind ')' in
+ search_subformat_end (sub_end + 2) end_ind c
+ | '}' ->
+ (* Error: %(...%}. *)
+ expected_character (str_ind + 1) "character ')'" '}';
+ | ')' ->
+ (* Error: %{...%). *)
+ expected_character (str_ind + 1) "character '}'" ')';
+ | _ ->
+ search_subformat_end (str_ind + 2) end_ind c
+ end
+ | _ -> search_subformat_end (str_ind + 1) end_ind c
+
+ (* Check if symb is a valid int conversion after "%l", "%n" or "%L" *)
+ and is_int_base symb = match symb with
+ | 'd' | 'i' | 'x' | 'X' | 'o' | 'u' -> true
+ | _ -> false
+
+ (* Convert a char (l, n or L) to its associated counter. *)
+ and counter_of_char symb = match symb with
+ | 'l' -> Line_counter | 'n' -> Char_counter
+ | 'L' -> Token_counter | _ -> assert false
+
+ (* Convert (plus, symb) to its associated int_conv. *)
+ and compute_int_conv pct_ind str_ind plus sharp space symb =
+ match plus, sharp, space, symb with
+ | false, false, false, 'd' -> Int_d | false, false, false, 'i' -> Int_i
+ | false, false, true, 'd' -> Int_sd | false, false, true, 'i' -> Int_si
+ | true, false, false, 'd' -> Int_pd | true, false, false, 'i' -> Int_pi
+ | false, false, false, 'x' -> Int_x | false, false, false, 'X' -> Int_X
+ | false, true, false, 'x' -> Int_Cx | false, true, false, 'X' -> Int_CX
+ | false, false, false, 'o' -> Int_o
+ | false, true, false, 'o' -> Int_Co
+ | false, false, false, 'u' -> Int_u
+ | _, true, _, 'x' when legacy_behavior -> Int_Cx
+ | _, true, _, 'X' when legacy_behavior -> Int_CX
+ | _, true, _, 'o' when legacy_behavior -> Int_Co
+ | _, true, _, _ ->
+ if legacy_behavior then (* ignore *)
+ compute_int_conv pct_ind str_ind plus false space symb
+ else incompatible_flag pct_ind str_ind symb "'#'"
+ | true, false, true, _ ->
+ if legacy_behavior then
+ (* plus and space: legacy implementation prefers plus *)
+ compute_int_conv pct_ind str_ind plus sharp false symb
+ else incompatible_flag pct_ind str_ind ' ' "'+'"
+ | false, false, true, _ ->
+ if legacy_behavior then (* ignore *)
+ compute_int_conv pct_ind str_ind plus sharp false symb
+ else incompatible_flag pct_ind str_ind symb "' '"
+ | true, false, false, _ ->
+ if legacy_behavior then (* ignore *)
+ compute_int_conv pct_ind str_ind false sharp space symb
+ else incompatible_flag pct_ind str_ind symb "'+'"
+ | false, false, false, _ -> assert false
+
+ (* Convert (plus, symb) to its associated float_conv. *)
+ and compute_float_conv pct_ind str_ind plus space symb =
+ match plus, space, symb with
+ | false, false, 'f' -> Float_f | false, false, 'e' -> Float_e
+ | false, true, 'f' -> Float_sf | false, true, 'e' -> Float_se
+ | true, false, 'f' -> Float_pf | true, false, 'e' -> Float_pe
+ | false, false, 'E' -> Float_E | false, false, 'g' -> Float_g
+ | false, true, 'E' -> Float_sE | false, true, 'g' -> Float_sg
+ | true, false, 'E' -> Float_pE | true, false, 'g' -> Float_pg
+ | false, false, 'G' -> Float_G
+ | false, true, 'G' -> Float_sG
+ | true, false, 'G' -> Float_pG
+ | false, false, 'F' -> Float_F
+ | true, true, _ ->
+ if legacy_behavior then
+ (* plus and space: legacy implementation prefers plus *)
+ compute_float_conv pct_ind str_ind plus false symb
+ else incompatible_flag pct_ind str_ind ' ' "'+'"
+ | false, true, _ ->
+ if legacy_behavior then (* ignore *)
+ compute_float_conv pct_ind str_ind plus false symb
+ else incompatible_flag pct_ind str_ind symb "' '"
+ | true, false, _ ->
+ if legacy_behavior then (* ignore *)
+ compute_float_conv pct_ind str_ind false space symb
+ else incompatible_flag pct_ind str_ind symb "'+'"
+ | false, false, _ -> assert false
+
+ (* Raise a Failure with a friendly error message about incompatible options.*)
+ and incompatible_flag : type a . int -> int -> char -> string -> a =
+ fun pct_ind str_ind symb option ->
+ let subfmt = String.sub str pct_ind (str_ind - pct_ind) in
+ failwith_message
+ "invalid format %S: at character number %d, \
+ %s is incompatible with '%c' in sub-format %S"
+ str pct_ind option symb subfmt;
+
+ in parse 0 (String.length str)
+
+(******************************************************************************)
+ (* Guarded string to format conversions *)
+
+(* Convert a string to a format according to an fmtty. *)
+(* Raise a Failure with an error message in case of type mismatch. *)
+let format_of_string_fmtty str fmtty =
+ let Fmt_EBB fmt = fmt_ebb_of_string str in
+ try Format (type_format fmt fmtty, str)
+ with Type_mismatch ->
+ failwith_message
+ "bad input: format type mismatch between %S and %S"
+ str (string_of_fmtty fmtty)
+
+(* Convert a string to a format compatible with an other format. *)
+(* Raise a Failure with an error message in case of type mismatch. *)
+let format_of_string_format str (Format (fmt', str')) =
+ let Fmt_EBB fmt = fmt_ebb_of_string str in
+ try Format (type_format fmt (fmtty_of_fmt fmt'), str)
+ with Type_mismatch ->
+ failwith_message
+ "bad input: format type mismatch between %S and %S" str str'
diff --git a/stdlib/camlinternalFormat.mli b/stdlib/camlinternalFormat.mli
new file mode 100644
index 000000000..dd8da62d2
--- /dev/null
+++ b/stdlib/camlinternalFormat.mli
@@ -0,0 +1,104 @@
+(* No comments, OCaml stdlib internal use only. *)
+
+open CamlinternalFormatBasics
+
+val is_in_char_set : char_set -> char -> bool
+val rev_char_set : char_set -> char_set
+
+type mutable_char_set = bytes
+val create_char_set : unit -> mutable_char_set
+val add_in_char_set : mutable_char_set -> char -> unit
+val freeze_char_set : mutable_char_set -> char_set
+
+type ('a, 'b, 'c, 'd, 'e, 'f) param_format_ebb = Param_format_EBB :
+ ('x -> 'a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('a, 'b, 'c, 'd, 'e, 'f) param_format_ebb
+
+val param_format_of_ignored_format :
+ ('a, 'b, 'c, 'd, 'y, 'x) ignored -> ('x, 'b, 'c, 'y, 'e, 'f) fmt ->
+ ('a, 'b, 'c, 'd, 'e, 'f) param_format_ebb
+
+type ('b, 'c) acc_formatting_gen =
+ | Acc_open_tag of ('b, 'c) acc
+ | Acc_open_box of ('b, 'c) acc
+
+and ('b, 'c) acc =
+ | Acc_formatting_lit of ('b, 'c) acc * formatting_lit
+ | Acc_formatting_gen of ('b, 'c) acc * ('b, 'c) acc_formatting_gen
+ | Acc_string_literal of ('b, 'c) acc * string
+ | Acc_char_literal of ('b, 'c) acc * char
+ | Acc_data_string of ('b, 'c) acc * string
+ | Acc_data_char of ('b, 'c) acc * char
+ | Acc_delay of ('b, 'c) acc * ('b -> 'c)
+ | Acc_flush of ('b, 'c) acc
+ | Acc_invalid_arg of ('b, 'c) acc * string
+ | End_of_acc
+
+type ('a, 'b) heter_list =
+ | Cons : 'c * ('a, 'b) heter_list -> ('c -> 'a, 'b) heter_list
+ | Nil : ('b, 'b) heter_list
+
+type ('b, 'c, 'e, 'f) fmt_ebb = Fmt_EBB :
+ ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.fmt ->
+ ('b, 'c, 'e, 'f) fmt_ebb
+
+val make_printf :
+ ('b -> ('b, 'c) acc -> 'd) -> 'b -> ('b, 'c) acc ->
+ ('a, 'b, 'c, 'c, 'c, 'd) CamlinternalFormatBasics.fmt -> 'a
+
+val output_acc : out_channel -> (out_channel, unit) acc -> unit
+val bufput_acc : Buffer.t -> (Buffer.t, unit) acc -> unit
+val strput_acc : Buffer.t -> (unit, string) acc -> unit
+
+val type_format :
+ ('x, 'b, 'c, 't, 'u, 'v) CamlinternalFormatBasics.fmt ->
+ ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.fmtty ->
+ ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.fmt
+
+val fmt_ebb_of_string : ?legacy_behavior:bool -> string -> ('b, 'c, 'e, 'f) fmt_ebb
+(* warning: the optional flag legacy_behavior is EXPERIMENTAL and will
+ be removed in the next version. You must not set it explicitly. It
+ is only used by the type-checker implementation.
+*)
+
+val format_of_string_fmtty :
+ string ->
+ ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.fmtty ->
+ ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.format6
+
+val format_of_string_format :
+ string ->
+ ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.format6 ->
+ ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.format6
+
+val char_of_iconv : CamlinternalFormatBasics.int_conv -> char
+val string_of_formatting_lit : CamlinternalFormatBasics.formatting_lit -> string
+val string_of_formatting_gen :
+ ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.formatting_gen -> string
+
+val string_of_fmtty :
+ ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.fmtty -> string
+val string_of_fmt :
+ ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.fmt -> string
+
+val open_box_of_string : string -> int * block_type
+
+val symm :
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+-> ('a2, 'b2, 'c2, 'd2, 'e2, 'f2,
+ 'a1, 'b1, 'c1, 'd1, 'e1, 'f1) fmtty_rel
+
+val trans :
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+-> ('a2, 'b2, 'c2, 'd2, 'e2, 'f2,
+ 'a3, 'b3, 'c3, 'd3, 'e3, 'f3) fmtty_rel
+-> ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a3, 'b3, 'c3, 'd3, 'e3, 'f3) fmtty_rel
+
+val recast :
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1) fmt
+-> ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+-> ('a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmt
diff --git a/stdlib/camlinternalFormatBasics.ml b/stdlib/camlinternalFormatBasics.ml
new file mode 100644
index 000000000..e51e4e2ce
--- /dev/null
+++ b/stdlib/camlinternalFormatBasics.ml
@@ -0,0 +1,614 @@
+(* Padding position. *)
+type padty =
+ | Left (* Text is left justified ('-' option). *)
+ | Right (* Text is right justified (no '-' option). *)
+ | Zeros (* Text is right justified by zeros (see '0' option). *)
+
+(***)
+
+(* Integer conversion. *)
+type int_conv =
+ | Int_d | Int_pd | Int_sd (* %d | %+d | % d *)
+ | Int_i | Int_pi | Int_si (* %i | %+i | % i *)
+ | Int_x | Int_Cx (* %x | %#x *)
+ | Int_X | Int_CX (* %X | %#X *)
+ | Int_o | Int_Co (* %o | %#o *)
+ | Int_u (* %u *)
+
+(* Float conversion. *)
+type float_conv =
+ | Float_f | Float_pf | Float_sf (* %f | %+f | % f *)
+ | Float_e | Float_pe | Float_se (* %e | %+e | % e *)
+ | Float_E | Float_pE | Float_sE (* %E | %+E | % E *)
+ | Float_g | Float_pg | Float_sg (* %g | %+g | % g *)
+ | Float_G | Float_pG | Float_sG (* %G | %+G | % G *)
+ | Float_F (* %F *)
+
+(***)
+
+(* Char sets (see %[...]) are bitmaps implemented as 32-char strings. *)
+type char_set = string
+
+(***)
+
+(* Counter used in Scanf. *)
+type counter =
+ | Line_counter (* %l *)
+ | Char_counter (* %n *)
+ | Token_counter (* %N, %L *)
+
+(***)
+
+(* Padding of strings and numbers. *)
+type ('a, 'b) padding =
+ (* No padding (ex: "%d") *)
+ | No_padding : ('a, 'a) padding
+ (* Literal padding (ex: "%8d") *)
+ | Lit_padding : padty * int -> ('a, 'a) padding
+ (* Padding as extra argument (ex: "%*d") *)
+ | Arg_padding : padty -> (int -> 'a, 'a) padding
+
+(* Some formats, such as %_d,
+ only accept an optional number as padding option (no extra argument) *)
+type pad_option = int option
+
+(* Precision of floats and '0'-padding of integers. *)
+type ('a, 'b) precision =
+ (* No precision (ex: "%f") *)
+ | No_precision : ('a, 'a) precision
+ (* Literal precision (ex: "%.3f") *)
+ | Lit_precision : int -> ('a, 'a) precision
+ (* Precision as extra argument (ex: "%.*f") *)
+ | Arg_precision : (int -> 'a, 'a) precision
+
+(* Some formats, such as %_f,
+ only accept an optional number as precision option (no extra argument) *)
+type prec_option = int option
+
+(***)
+
+(* Relational format types
+
+In the first format+gadts implementation, the type for %(..%) in the
+fmt GADT was as follows:
+
+| Format_subst : (* %(...%) *)
+ pad_option * ('d1, 'q1, 'd2, 'q2) reader_nb_unifier *
+ ('x, 'b, 'c, 'd1, 'q1, 'u) fmtty *
+ ('u, 'b, 'c, 'q1, 'e1, 'f) fmt ->
+ (('x, 'b, 'c, 'd2, 'q2, 'u) format6 -> 'x, 'b, 'c, 'd1, 'e1, 'f) fmt
+
+Notice that the 'u parameter in 'f position in the format argument
+(('x, .., 'u) format6 -> ..) is equal to the 'u parameter in 'a
+position in the format tail (('u, .., 'f) fmt). This means that the
+type of the expected format parameter depends of where the %(...%)
+are in the format string:
+
+ # Printf.printf "%(%)";;
+ - : (unit, out_channel, unit, '_a, '_a, unit)
+ CamlinternalFormatBasics.format6 -> unit
+ = <fun>
+ # Printf.printf "%(%)%d";;
+ - : (int -> unit, out_channel, unit, '_a, '_a, int -> unit)
+ CamlinternalFormatBasics.format6 -> int -> unit
+ = <fun>
+
+On the contrary, the legacy typer gives a clever type that does not
+depend on the position of %(..%) in the format string. For example,
+%(%) will have the polymorphic type ('a, 'b, 'c, 'd, 'd, 'a): it can
+be concatenated to any format type, and only enforces the constraint
+that its 'a and 'f parameters are equal (no format arguments) and 'd
+and 'e are equal (no reader argument).
+
+The weakening of this parameter type in the GADT version broke user
+code (in fact it essentially made %(...%) unusable except at the last
+position of a format). In particular, the following would not work
+anymore:
+
+ fun sep ->
+ Format.printf "foo%(%)bar%(%)baz" sep sep
+
+As the type-checker would require two *incompatible* types for the %(%)
+in different positions.
+
+The solution to regain a general type for %(..%) is to generalize this
+technique, not only on the 'd, 'e parameters, but on all six
+parameters of a format: we introduce a "relational" type
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+whose values are proofs that ('a1, .., 'f1) and ('a2, .., 'f2) morally
+correspond to the same format type: 'a1 is obtained from 'f1,'b1,'c1
+in the exact same way that 'a2 is obtained from 'f2,'b2,'c2, etc.
+
+For example, the relation between two format types beginning with a Char
+parameter is as follows:
+
+| Char_ty : (* %c *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (char -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ char -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+
+In the general case, the term structure of fmtty_rel is (almost¹)
+isomorphic to the fmtty of the previous implementation: every
+constructor is re-read with a binary, relational type, instead of the
+previous unary typing. fmtty can then be re-defined as the diagonal of
+fmtty_rel:
+
+ type ('a, 'b, 'c, 'd, 'e, 'f) fmtty =
+ ('a, 'b, 'c, 'd, 'e, 'f,
+ 'a, 'b, 'c, 'd, 'e, 'f) fmtty_rel
+
+Once we have this fmtty_rel type in place, we can give the more
+general type to %(...%):
+
+| Format_subst : (* %(...%) *)
+ pad_option *
+ ('g, 'h, 'i, 'j, 'k, 'l,
+ 'g2, 'b, 'c, 'j2, 'd, 'a) fmtty_rel *
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ (('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'g2, 'b, 'c, 'j2, 'e, 'f) fmt
+
+We accept any format (('g, 'h, 'i, 'j, 'k, 'l) format6) (this is
+completely unrelated to the type of the current format), but also
+require a proof that this format is in relation to another format that
+is concatenable to the format tail. When executing a %(...%) format
+(in camlinternalFormat.ml:make_printf or scanf.ml:make_scanf), we
+transtype the format along this relation using the 'recast' function
+to transpose between related format types.
+
+ val recast :
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1) fmt
+ -> ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+ -> ('a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmt
+
+NOTE ¹: the typing of Format_subst_ty requires not one format type, but
+two, one to establish the link between the format argument and the
+first six parameters, and the other for the link between the format
+argumant and the last six parameters.
+
+| Format_subst_ty : (* %(...%) *)
+ ('g, 'h, 'i, 'j, 'k, 'l,
+ 'g1, 'b1, 'c1, 'j1, 'd1, 'a1) fmtty_rel *
+ ('g, 'h, 'i, 'j, 'k, 'l,
+ 'g2, 'b2, 'c2, 'j2, 'd2, 'a2) fmtty_rel *
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'g1, 'b1, 'c1, 'j1, 'e1, 'f1,
+ ('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'g2, 'b2, 'c2, 'j2, 'e2, 'f2) fmtty_rel
+
+When we generate a format AST, we generate exactly the same witness
+for both relations, and the witness-conversion functions in
+camlinternalFormat do rely on this invariant. For example, the
+function that proves that the relation is transitive
+
+ val trans :
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+ -> ('a2, 'b2, 'c2, 'd2, 'e2, 'f2,
+ 'a3, 'b3, 'c3, 'd3, 'e3, 'f3) fmtty_rel
+ -> ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a3, 'b3, 'c3, 'd3, 'e3, 'f3) fmtty_rel
+
+does assume that the two input have exactly the same term structure
+(and is only every used for argument witnesses of the
+Format_subst_ty constructor).
+*)
+
+(* Type of a block used by the Format pretty-printer. *)
+type block_type =
+ | Pp_hbox (* Horizontal block no line breaking *)
+ | Pp_vbox (* Vertical block each break leads to a new line *)
+ | Pp_hvbox (* Horizontal-vertical block: same as vbox, except if this block
+ is small enough to fit on a single line *)
+ | Pp_hovbox (* Horizontal or Vertical block: breaks lead to new line
+ only when necessary to print the content of the block *)
+ | Pp_box (* Horizontal or Indent block: breaks lead to new line
+ only when necessary to print the content of the block, or
+ when it leads to a new indentation of the current line *)
+ | Pp_fits (* Internal usage: when a block fits on a single line *)
+
+(* Formatting element used by the Format pretty-printter. *)
+type formatting_lit =
+ | Close_box (* @] *)
+ | Close_tag (* @} *)
+ | Break of string * int * int (* @, | @ | @; | @;<> *)
+ | FFlush (* @? *)
+ | Force_newline (* @\n *)
+ | Flush_newline (* @. *)
+ | Magic_size of string * int (* @<n> *)
+ | Escaped_at (* @@ *)
+ | Escaped_percent (* @%% *)
+ | Scan_indic of char (* @X *)
+
+(* Formatting element used by the Format pretty-printter. *)
+type ('a, 'b, 'c, 'd, 'e, 'f) formatting_gen =
+ | Open_tag : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> (* @{ *)
+ ('a, 'b, 'c, 'd, 'e, 'f) formatting_gen
+ | Open_box : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> (* @[ *)
+ ('a, 'b, 'c, 'd, 'e, 'f) formatting_gen
+
+(***)
+
+(* List of format type elements. *)
+(* In particular used to represent %(...%) and %{...%} contents. *)
+and ('a, 'b, 'c, 'd, 'e, 'f) fmtty =
+ ('a, 'b, 'c, 'd, 'e, 'f,
+ 'a, 'b, 'c, 'd, 'e, 'f) fmtty_rel
+and ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel =
+ | Char_ty : (* %c *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (char -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ char -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+ | String_ty : (* %s *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (string -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ string -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+ | Int_ty : (* %d *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (int -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ int -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+ | Int32_ty : (* %ld *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (int32 -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ int32 -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+ | Nativeint_ty : (* %nd *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (nativeint -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ nativeint -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+ | Int64_ty : (* %Ld *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (int64 -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ int64 -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+ | Float_ty : (* %f *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (float -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ float -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+ | Bool_ty : (* %B *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (bool -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ bool -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+
+ | Format_arg_ty : (* %{...%} *)
+ ('g, 'h, 'i, 'j, 'k, 'l) fmtty *
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ ('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+ | Format_subst_ty : (* %(...%) *)
+ ('g, 'h, 'i, 'j, 'k, 'l,
+ 'g1, 'b1, 'c1, 'j1, 'd1, 'a1) fmtty_rel *
+ ('g, 'h, 'i, 'j, 'k, 'l,
+ 'g2, 'b2, 'c2, 'j2, 'd2, 'a2) fmtty_rel *
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'g1, 'b1, 'c1, 'j1, 'e1, 'f1,
+ ('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'g2, 'b2, 'c2, 'j2, 'e2, 'f2) fmtty_rel
+
+ (* Printf and Format specific constructors. *)
+ | Alpha_ty : (* %a *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (('b1 -> 'x -> 'c1) -> 'x -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ ('b2 -> 'x -> 'c2) -> 'x -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+ | Theta_ty : (* %t *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (('b1 -> 'c1) -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ ('b2 -> 'c2) -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+
+ (* Scanf specific constructor. *)
+ | Reader_ty : (* %r *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ ('x -> 'a1, 'b1, 'c1, ('b1 -> 'x) -> 'd1, 'e1, 'f1,
+ 'x -> 'a2, 'b2, 'c2, ('b2 -> 'x) -> 'd2, 'e2, 'f2) fmtty_rel
+ | Ignored_reader_ty : (* %_r *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ ('a1, 'b1, 'c1, ('b1 -> 'x) -> 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, ('b2 -> 'x) -> 'd2, 'e2, 'f2) fmtty_rel
+
+ | End_of_fmtty :
+ ('f1, 'b1, 'c1, 'd1, 'd1, 'f1,
+ 'f2, 'b2, 'c2, 'd2, 'd2, 'f2) fmtty_rel
+
+(***)
+
+(* List of format elements. *)
+and ('a, 'b, 'c, 'd, 'e, 'f) fmt =
+ | Char : (* %c *)
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ (char -> 'a, 'b, 'c, 'd, 'e, 'f) fmt
+ | Caml_char : (* %C *)
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ (char -> 'a, 'b, 'c, 'd, 'e, 'f) fmt
+ | String : (* %s *)
+ ('x, string -> 'a) padding * ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('x, 'b, 'c, 'd, 'e, 'f) fmt
+ | Caml_string : (* %S *)
+ ('x, string -> 'a) padding * ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('x, 'b, 'c, 'd, 'e, 'f) fmt
+ | Int : (* %[dixXuo] *)
+ int_conv * ('x, 'y) padding * ('y, int -> 'a) precision *
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('x, 'b, 'c, 'd, 'e, 'f) fmt
+ | Int32 : (* %l[dixXuo] *)
+ int_conv * ('x, 'y) padding * ('y, int32 -> 'a) precision *
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('x, 'b, 'c, 'd, 'e, 'f) fmt
+ | Nativeint : (* %n[dixXuo] *)
+ int_conv * ('x, 'y) padding * ('y, nativeint -> 'a) precision *
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('x, 'b, 'c, 'd, 'e, 'f) fmt
+ | Int64 : (* %L[dixXuo] *)
+ int_conv * ('x, 'y) padding * ('y, int64 -> 'a) precision *
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('x, 'b, 'c, 'd, 'e, 'f) fmt
+ | Float : (* %[feEgGF] *)
+ float_conv * ('x, 'y) padding * ('y, float -> 'a) precision *
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('x, 'b, 'c, 'd, 'e, 'f) fmt
+ | Bool : (* %[bB] *)
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ (bool -> 'a, 'b, 'c, 'd, 'e, 'f) fmt
+ | Flush : (* %! *)
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt
+
+ | String_literal : (* abc *)
+ string * ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt
+ | Char_literal : (* x *)
+ char * ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt
+
+ | Format_arg : (* %{...%} *)
+ pad_option * ('g, 'h, 'i, 'j, 'k, 'l) fmtty *
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ (('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'a, 'b, 'c, 'd, 'e, 'f) fmt
+ | Format_subst : (* %(...%) *)
+ pad_option *
+ ('g, 'h, 'i, 'j, 'k, 'l,
+ 'g2, 'b, 'c, 'j2, 'd, 'a) fmtty_rel *
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ (('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'g2, 'b, 'c, 'j2, 'e, 'f) fmt
+
+ (* Printf and Format specific constructor. *)
+ | Alpha : (* %a *)
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ (('b -> 'x -> 'c) -> 'x -> 'a, 'b, 'c, 'd, 'e, 'f) fmt
+ | Theta : (* %t *)
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ (('b -> 'c) -> 'a, 'b, 'c, 'd, 'e, 'f) fmt
+
+ (* Format specific constructor: *)
+ | Formatting_lit : (* @_ *)
+ formatting_lit * ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt
+ | Formatting_gen : (* @_ *)
+ ('a1, 'b, 'c, 'd1, 'e1, 'f1) formatting_gen *
+ ('f1, 'b, 'c, 'e1, 'e2, 'f2) fmt -> ('a1, 'b, 'c, 'd1, 'e2, 'f2) fmt
+
+ (* Scanf specific constructors: *)
+ | Reader : (* %r *)
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('x -> 'a, 'b, 'c, ('b -> 'x) -> 'd, 'e, 'f) fmt
+ | Scan_char_set : (* %[...] *)
+ pad_option * char_set * ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ (string -> 'a, 'b, 'c, 'd, 'e, 'f) fmt
+ | Scan_get_counter : (* %[nlNL] *)
+ counter * ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ (int -> 'a, 'b, 'c, 'd, 'e, 'f) fmt
+ | Ignored_param : (* %_ *)
+ ('a, 'b, 'c, 'd, 'y, 'x) ignored * ('x, 'b, 'c, 'y, 'e, 'f) fmt ->
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt
+
+ | End_of_format :
+ ('f, 'b, 'c, 'e, 'e, 'f) fmt
+
+(***)
+
+(* Type for ignored parameters (see "%_"). *)
+and ('a, 'b, 'c, 'd, 'e, 'f) ignored =
+ | Ignored_char : (* %_c *)
+ ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_caml_char : (* %_C *)
+ ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_string : (* %_s *)
+ pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_caml_string : (* %_S *)
+ pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_int : (* %_d *)
+ int_conv * pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_int32 : (* %_ld *)
+ int_conv * pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_nativeint : (* %_nd *)
+ int_conv * pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_int64 : (* %_Ld *)
+ int_conv * pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_float : (* %_f *)
+ pad_option * prec_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_bool : (* %_B *)
+ ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_format_arg : (* %_{...%} *)
+ pad_option * ('g, 'h, 'i, 'j, 'k, 'l) fmtty ->
+ ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_format_subst : (* %_(...%) *)
+ pad_option * ('a, 'b, 'c, 'd, 'e, 'f) fmtty ->
+ ('a, 'b, 'c, 'd, 'e, 'f) ignored
+ | Ignored_reader : (* %_r *)
+ ('a, 'b, 'c, ('b -> 'x) -> 'd, 'd, 'a) ignored
+ | Ignored_scan_char_set : (* %_[...] *)
+ pad_option * char_set -> ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_scan_get_counter : (* %_[nlNL] *)
+ counter -> ('a, 'b, 'c, 'd, 'd, 'a) ignored
+
+and ('a, 'b, 'c, 'd, 'e, 'f) format6 =
+ Format of ('a, 'b, 'c, 'd, 'e, 'f) fmt * string
+
+let rec erase_rel : type a b c d e f g h i j k l .
+ (a, b, c, d, e, f,
+ g, h, i, j, k, l) fmtty_rel -> (a, b, c, d, e, f) fmtty
+= function
+ | Char_ty rest ->
+ Char_ty (erase_rel rest)
+ | String_ty rest ->
+ String_ty (erase_rel rest)
+ | Int_ty rest ->
+ Int_ty (erase_rel rest)
+ | Int32_ty rest ->
+ Int32_ty (erase_rel rest)
+ | Int64_ty rest ->
+ Int64_ty (erase_rel rest)
+ | Nativeint_ty rest ->
+ Nativeint_ty (erase_rel rest)
+ | Float_ty rest ->
+ Float_ty (erase_rel rest)
+ | Bool_ty rest ->
+ Bool_ty (erase_rel rest)
+ | Format_arg_ty (ty, rest) ->
+ Format_arg_ty (ty, erase_rel rest)
+ | Format_subst_ty (ty1, ty2, rest) ->
+ Format_subst_ty (ty1, ty1, erase_rel rest)
+ | Alpha_ty rest ->
+ Alpha_ty (erase_rel rest)
+ | Theta_ty rest ->
+ Theta_ty (erase_rel rest)
+ | Reader_ty rest ->
+ Reader_ty (erase_rel rest)
+ | Ignored_reader_ty rest ->
+ Ignored_reader_ty (erase_rel rest)
+ | End_of_fmtty -> End_of_fmtty
+
+(******************************************************************************)
+ (* Format type concatenation *)
+
+(* Concatenate two format types. *)
+(* Used by:
+ * reader_nb_unifier_of_fmtty to count readers in an fmtty,
+ * Scanf.take_fmtty_format_readers to extract readers inside %(...%),
+ * CamlinternalFormat.fmtty_of_ignored_format to extract format type. *)
+
+(*
+let rec concat_fmtty : type a b c d e f g h .
+ (a, b, c, d, e, f) fmtty ->
+ (f, b, c, e, g, h) fmtty ->
+ (a, b, c, d, g, h) fmtty =
+*)
+let rec concat_fmtty :
+ type a1 b1 c1 d1 e1 f1
+ a2 b2 c2 d2 e2 f2
+ g1 j1 g2 j2
+ .
+ (g1, b1, c1, j1, d1, a1,
+ g2, b2, c2, j2, d2, a2) fmtty_rel ->
+ (a1, b1, c1, d1, e1, f1,
+ a2, b2, c2, d2, e2, f2) fmtty_rel ->
+ (g1, b1, c1, j1, e1, f1,
+ g2, b2, c2, j2, e2, f2) fmtty_rel =
+fun fmtty1 fmtty2 -> match fmtty1 with
+ | Char_ty rest ->
+ Char_ty (concat_fmtty rest fmtty2)
+ | String_ty rest ->
+ String_ty (concat_fmtty rest fmtty2)
+ | Int_ty rest ->
+ Int_ty (concat_fmtty rest fmtty2)
+ | Int32_ty rest ->
+ Int32_ty (concat_fmtty rest fmtty2)
+ | Nativeint_ty rest ->
+ Nativeint_ty (concat_fmtty rest fmtty2)
+ | Int64_ty rest ->
+ Int64_ty (concat_fmtty rest fmtty2)
+ | Float_ty rest ->
+ Float_ty (concat_fmtty rest fmtty2)
+ | Bool_ty rest ->
+ Bool_ty (concat_fmtty rest fmtty2)
+ | Alpha_ty rest ->
+ Alpha_ty (concat_fmtty rest fmtty2)
+ | Theta_ty rest ->
+ Theta_ty (concat_fmtty rest fmtty2)
+ | Reader_ty rest ->
+ Reader_ty (concat_fmtty rest fmtty2)
+ | Ignored_reader_ty rest ->
+ Ignored_reader_ty (concat_fmtty rest fmtty2)
+ | Format_arg_ty (ty, rest) ->
+ Format_arg_ty (ty, concat_fmtty rest fmtty2)
+ | Format_subst_ty (ty1, ty2, rest) ->
+ Format_subst_ty (ty1, ty2, concat_fmtty rest fmtty2)
+ | End_of_fmtty -> fmtty2
+
+(******************************************************************************)
+ (* Format concatenation *)
+
+(* Concatenate two formats. *)
+let rec concat_fmt : type a b c d e f g h .
+ (a, b, c, d, e, f) fmt ->
+ (f, b, c, e, g, h) fmt ->
+ (a, b, c, d, g, h) fmt =
+fun fmt1 fmt2 -> match fmt1 with
+ | String (pad, rest) ->
+ String (pad, concat_fmt rest fmt2)
+ | Caml_string (pad, rest) ->
+ Caml_string (pad, concat_fmt rest fmt2)
+
+ | Int (iconv, pad, prec, rest) ->
+ Int (iconv, pad, prec, concat_fmt rest fmt2)
+ | Int32 (iconv, pad, prec, rest) ->
+ Int32 (iconv, pad, prec, concat_fmt rest fmt2)
+ | Nativeint (iconv, pad, prec, rest) ->
+ Nativeint (iconv, pad, prec, concat_fmt rest fmt2)
+ | Int64 (iconv, pad, prec, rest) ->
+ Int64 (iconv, pad, prec, concat_fmt rest fmt2)
+ | Float (fconv, pad, prec, rest) ->
+ Float (fconv, pad, prec, concat_fmt rest fmt2)
+
+ | Char (rest) ->
+ Char (concat_fmt rest fmt2)
+ | Caml_char rest ->
+ Caml_char (concat_fmt rest fmt2)
+ | Bool rest ->
+ Bool (concat_fmt rest fmt2)
+ | Alpha rest ->
+ Alpha (concat_fmt rest fmt2)
+ | Theta rest ->
+ Theta (concat_fmt rest fmt2)
+ | Reader rest ->
+ Reader (concat_fmt rest fmt2)
+ | Flush rest ->
+ Flush (concat_fmt rest fmt2)
+
+ | String_literal (str, rest) ->
+ String_literal (str, concat_fmt rest fmt2)
+ | Char_literal (chr, rest) ->
+ Char_literal (chr, concat_fmt rest fmt2)
+
+ | Format_arg (pad, fmtty, rest) ->
+ Format_arg (pad, fmtty, concat_fmt rest fmt2)
+ | Format_subst (pad, fmtty, rest) ->
+ Format_subst (pad, fmtty, concat_fmt rest fmt2)
+
+ | Scan_char_set (width_opt, char_set, rest) ->
+ Scan_char_set (width_opt, char_set, concat_fmt rest fmt2)
+ | Scan_get_counter (counter, rest) ->
+ Scan_get_counter (counter, concat_fmt rest fmt2)
+ | Ignored_param (ign, rest) ->
+ Ignored_param (ign, concat_fmt rest fmt2)
+
+ | Formatting_lit (fmting_lit, rest) ->
+ Formatting_lit (fmting_lit, concat_fmt rest fmt2)
+ | Formatting_gen (fmting_gen, rest) ->
+ Formatting_gen (fmting_gen, concat_fmt rest fmt2)
+
+ | End_of_format ->
+ fmt2
diff --git a/stdlib/camlinternalFormatBasics.mli b/stdlib/camlinternalFormatBasics.mli
new file mode 100644
index 000000000..52f428ad8
--- /dev/null
+++ b/stdlib/camlinternalFormatBasics.mli
@@ -0,0 +1,287 @@
+(* No comments, OCaml stdlib internal use only. *)
+
+type padty = Left | Right | Zeros
+
+type int_conv =
+ | Int_d | Int_pd | Int_sd | Int_i | Int_pi | Int_si
+ | Int_x | Int_Cx | Int_X | Int_CX | Int_o | Int_Co | Int_u
+
+type float_conv =
+ | Float_f | Float_pf | Float_sf | Float_e | Float_pe | Float_se
+ | Float_E | Float_pE | Float_sE | Float_g | Float_pg | Float_sg
+ | Float_G | Float_pG | Float_sG | Float_F
+
+type char_set = string
+
+type counter = Line_counter | Char_counter | Token_counter
+
+type ('a, 'b) padding =
+ | No_padding : ('a, 'a) padding
+ | Lit_padding : padty * int -> ('a, 'a) padding
+ | Arg_padding : padty -> (int -> 'a, 'a) padding
+
+type pad_option = int option
+
+type ('a, 'b) precision =
+ | No_precision : ('a, 'a) precision
+ | Lit_precision : int -> ('a, 'a) precision
+ | Arg_precision : (int -> 'a, 'a) precision
+
+type prec_option = int option
+
+type block_type = Pp_hbox | Pp_vbox | Pp_hvbox | Pp_hovbox | Pp_box | Pp_fits
+
+type formatting_lit =
+ | Close_box
+ | Close_tag
+ | Break of string * int * int
+ | FFlush
+ | Force_newline
+ | Flush_newline
+ | Magic_size of string * int
+ | Escaped_at
+ | Escaped_percent
+ | Scan_indic of char
+
+type ('a, 'b, 'c, 'd, 'e, 'f) formatting_gen =
+ | Open_tag : ('a, 'b, 'c, 'd, 'e, 'f) format6 ->
+ ('a, 'b, 'c, 'd, 'e, 'f) formatting_gen
+ | Open_box : ('a, 'b, 'c, 'd, 'e, 'f) format6 ->
+ ('a, 'b, 'c, 'd, 'e, 'f) formatting_gen
+
+and ('a, 'b, 'c, 'd, 'e, 'f) fmtty =
+ ('a, 'b, 'c, 'd, 'e, 'f,
+ 'a, 'b, 'c, 'd, 'e, 'f) fmtty_rel
+and ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel =
+| Char_ty : (* %c *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (char -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ char -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+| String_ty : (* %s *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (string -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ string -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+| Int_ty : (* %d *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (int -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ int -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+| Int32_ty : (* %ld *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (int32 -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ int32 -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+| Nativeint_ty : (* %nd *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (nativeint -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ nativeint -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+| Int64_ty : (* %Ld *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (int64 -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ int64 -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+| Float_ty : (* %f *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (float -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ float -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+| Bool_ty : (* %B *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (bool -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ bool -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+| Format_arg_ty : (* %{...%} *)
+ ('g, 'h, 'i, 'j, 'k, 'l) fmtty *
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ ('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+| Format_subst_ty : (* %(...%) *)
+ ('g, 'h, 'i, 'j, 'k, 'l,
+ 'g1, 'b1, 'c1, 'j1, 'd1, 'a1) fmtty_rel *
+ ('g, 'h, 'i, 'j, 'k, 'l,
+ 'g2, 'b2, 'c2, 'j2, 'd2, 'a2) fmtty_rel *
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'g1, 'b1, 'c1, 'j1, 'e1, 'f1,
+ ('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'g2, 'b2, 'c2, 'j2, 'e2, 'f2) fmtty_rel
+
+(* Printf and Format specific constructors. *)
+| Alpha_ty : (* %a *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (('b1 -> 'x -> 'c1) -> 'x -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ ('b2 -> 'x -> 'c2) -> 'x -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+| Theta_ty : (* %t *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ (('b1 -> 'c1) -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ ('b2 -> 'c2) -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel
+
+(* Scanf specific constructor. *)
+| Reader_ty : (* %r *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ ('x -> 'a1, 'b1, 'c1, ('b1 -> 'x) -> 'd1, 'e1, 'f1,
+ 'x -> 'a2, 'b2, 'c2, ('b2 -> 'x) -> 'd2, 'e2, 'f2) fmtty_rel
+| Ignored_reader_ty : (* %_r *)
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ ('a1, 'b1, 'c1, ('b1 -> 'x) -> 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, ('b2 -> 'x) -> 'd2, 'e2, 'f2) fmtty_rel
+
+| End_of_fmtty :
+ ('f1, 'b1, 'c1, 'd1, 'd1, 'f1,
+ 'f2, 'b2, 'c2, 'd2, 'd2, 'f2) fmtty_rel
+
+(**)
+
+(** List of format elements. *)
+and ('a, 'b, 'c, 'd, 'e, 'f) fmt =
+| Char : (* %c *)
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ (char -> 'a, 'b, 'c, 'd, 'e, 'f) fmt
+| Caml_char : (* %C *)
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ (char -> 'a, 'b, 'c, 'd, 'e, 'f) fmt
+| String : (* %s *)
+ ('x, string -> 'a) padding * ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('x, 'b, 'c, 'd, 'e, 'f) fmt
+| Caml_string : (* %S *)
+ ('x, string -> 'a) padding * ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('x, 'b, 'c, 'd, 'e, 'f) fmt
+| Int : (* %[dixXuo] *)
+ int_conv * ('x, 'y) padding * ('y, int -> 'a) precision *
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('x, 'b, 'c, 'd, 'e, 'f) fmt
+| Int32 : (* %l[dixXuo] *)
+ int_conv * ('x, 'y) padding * ('y, int32 -> 'a) precision *
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('x, 'b, 'c, 'd, 'e, 'f) fmt
+| Nativeint : (* %n[dixXuo] *)
+ int_conv * ('x, 'y) padding * ('y, nativeint -> 'a) precision *
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('x, 'b, 'c, 'd, 'e, 'f) fmt
+| Int64 : (* %L[dixXuo] *)
+ int_conv * ('x, 'y) padding * ('y, int64 -> 'a) precision *
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('x, 'b, 'c, 'd, 'e, 'f) fmt
+| Float : (* %[feEgGF] *)
+ float_conv * ('x, 'y) padding * ('y, float -> 'a) precision *
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('x, 'b, 'c, 'd, 'e, 'f) fmt
+| Bool : (* %[bB] *)
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ (bool -> 'a, 'b, 'c, 'd, 'e, 'f) fmt
+| Flush : (* %! *)
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt
+
+| String_literal : (* abc *)
+ string * ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt
+| Char_literal : (* x *)
+ char * ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt
+
+| Format_arg : (* %{...%} *)
+ pad_option * ('g, 'h, 'i, 'j, 'k, 'l) fmtty *
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ (('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'a, 'b, 'c, 'd, 'e, 'f) fmt
+| Format_subst : (* %(...%) *)
+ pad_option *
+ ('g, 'h, 'i, 'j, 'k, 'l,
+ 'g2, 'b, 'c, 'j2, 'd, 'a) fmtty_rel *
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ (('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'g2, 'b, 'c, 'j2, 'e, 'f) fmt
+
+(* Printf and Format specific constructor. *)
+| Alpha : (* %a *)
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ (('b -> 'x -> 'c) -> 'x -> 'a, 'b, 'c, 'd, 'e, 'f) fmt
+| Theta : (* %t *)
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ (('b -> 'c) -> 'a, 'b, 'c, 'd, 'e, 'f) fmt
+
+(* Format specific constructor: *)
+| Formatting_lit : (* @_ *)
+ formatting_lit * ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt
+| Formatting_gen : (* @_ *)
+ ('a1, 'b, 'c, 'd1, 'e1, 'f1) formatting_gen *
+ ('f1, 'b, 'c, 'e1, 'e2, 'f2) fmt -> ('a1, 'b, 'c, 'd1, 'e2, 'f2) fmt
+
+(* Scanf specific constructors: *)
+| Reader : (* %r *)
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('x -> 'a, 'b, 'c, ('b -> 'x) -> 'd, 'e, 'f) fmt
+| Scan_char_set : (* %[...] *)
+ pad_option * char_set * ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ (string -> 'a, 'b, 'c, 'd, 'e, 'f) fmt
+| Scan_get_counter : (* %[nlNL] *)
+ counter * ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ (int -> 'a, 'b, 'c, 'd, 'e, 'f) fmt
+| Ignored_param : (* %_ *)
+ ('a, 'b, 'c, 'd, 'y, 'x) ignored * ('x, 'b, 'c, 'y, 'e, 'f) fmt ->
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt
+
+| End_of_format :
+ ('f, 'b, 'c, 'e, 'e, 'f) fmt
+
+and ('a, 'b, 'c, 'd, 'e, 'f) ignored =
+ | Ignored_char :
+ ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_caml_char :
+ ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_string :
+ pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_caml_string :
+ pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_int :
+ int_conv * pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_int32 :
+ int_conv * pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_nativeint :
+ int_conv * pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_int64 :
+ int_conv * pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_float :
+ pad_option * prec_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_bool :
+ ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_format_arg :
+ pad_option * ('g, 'h, 'i, 'j, 'k, 'l) fmtty ->
+ ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_format_subst :
+ pad_option * ('a, 'b, 'c, 'd, 'e, 'f) fmtty ->
+ ('a, 'b, 'c, 'd, 'e, 'f) ignored
+ | Ignored_reader :
+ ('a, 'b, 'c, ('b -> 'x) -> 'd, 'd, 'a) ignored
+ | Ignored_scan_char_set :
+ pad_option * char_set -> ('a, 'b, 'c, 'd, 'd, 'a) ignored
+ | Ignored_scan_get_counter :
+ counter -> ('a, 'b, 'c, 'd, 'd, 'a) ignored
+
+and ('a, 'b, 'c, 'd, 'e, 'f) format6 =
+ Format of ('a, 'b, 'c, 'd, 'e, 'f) fmt * string
+
+val concat_fmtty :
+ ('g1, 'b1, 'c1, 'j1, 'd1, 'a1,
+ 'g2, 'b2, 'c2, 'j2, 'd2, 'a2) fmtty_rel ->
+ ('a1, 'b1, 'c1, 'd1, 'e1, 'f1,
+ 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel ->
+ ('g1, 'b1, 'c1, 'j1, 'e1, 'f1,
+ 'g2, 'b2, 'c2, 'j2, 'e2, 'f2) fmtty_rel
+
+val erase_rel :
+ ('a, 'b, 'c, 'd, 'e, 'f,
+ 'g, 'h, 'i, 'j, 'k, 'l) fmtty_rel -> ('a, 'b, 'c, 'd, 'e, 'f) fmtty
+
+val concat_fmt :
+ ('a, 'b, 'c, 'd, 'e, 'f) fmt ->
+ ('f, 'b, 'c, 'e, 'g, 'h) fmt ->
+ ('a, 'b, 'c, 'd, 'g, 'h) fmt
diff --git a/stdlib/camlinternalOO.ml b/stdlib/camlinternalOO.ml
index 872a56065..3c39c0b67 100644
--- a/stdlib/camlinternalOO.ml
+++ b/stdlib/camlinternalOO.ml
@@ -128,7 +128,7 @@ let rec fit_size n =
let new_table pub_labels =
incr table_count;
let len = Array.length pub_labels in
- let methods = Array.create (len*2+2) dummy_met in
+ let methods = Array.make (len*2+2) dummy_met in
methods.(0) <- magic len;
methods.(1) <- magic (fit_size len * Sys.word_size / 8 - 1);
for i = 0 to len - 1 do methods.(i*2+3) <- magic pub_labels.(i) done;
@@ -144,7 +144,7 @@ let new_table pub_labels =
let resize array new_size =
let old_size = Array.length array.methods in
if new_size > old_size then begin
- let new_buck = Array.create new_size dummy_met in
+ let new_buck = Array.make new_size dummy_met in
Array.blit array.methods 0 new_buck 0 old_size;
array.methods <- new_buck
end
@@ -267,7 +267,7 @@ let to_array arr =
let new_methods_variables table meths vals =
let meths = to_array meths in
let nmeths = Array.length meths and nvals = Array.length vals in
- let res = Array.create (nmeths + nvals) 0 in
+ let res = Array.make (nmeths + nvals) 0 in
for i = 0 to nmeths - 1 do
res.(i) <- get_method_label table meths.(i)
done;
diff --git a/stdlib/digest.ml b/stdlib/digest.ml
index 2baf3dbfa..14cb4ebd9 100644
--- a/stdlib/digest.ml
+++ b/stdlib/digest.ml
@@ -23,11 +23,15 @@ external channel: in_channel -> int -> t = "caml_md5_chan"
let string str =
unsafe_string str 0 (String.length str)
+let bytes b = string (Bytes.unsafe_to_string b)
+
let substring str ofs len =
if ofs < 0 || len < 0 || ofs > String.length str - len
then invalid_arg "Digest.substring"
else unsafe_string str ofs len
+let subbytes b ofs len = substring (Bytes.unsafe_to_string b) ofs len
+
let file filename =
let ic = open_in_bin filename in
let d = channel ic (-1) in
@@ -35,23 +39,21 @@ let file filename =
d
let output chan digest =
- output chan digest 0 16
+ output_string chan digest
-let input chan =
- let digest = String.create 16 in
- really_input chan digest 0 16;
- digest
+let input chan = really_input_string chan 16
-let char_hex n = Char.unsafe_chr (n + if n < 10 then Char.code '0' else (Char.code 'a' - 10))
+let char_hex n =
+ Char.unsafe_chr (n + if n < 10 then Char.code '0' else (Char.code 'a' - 10))
let to_hex d =
- let result = String.create 32 in
+ let result = Bytes.create 32 in
for i = 0 to 15 do
let x = Char.code d.[i] in
- String.unsafe_set result (i*2) (char_hex (x lsr 4));
- String.unsafe_set result (i*2+1) (char_hex (x land 0x0f));
+ Bytes.unsafe_set result (i*2) (char_hex (x lsr 4));
+ Bytes.unsafe_set result (i*2+1) (char_hex (x land 0x0f));
done;
- result
+ Bytes.unsafe_to_string result
let from_hex s =
if String.length s <> 32 then raise (Invalid_argument "Digest.from_hex");
@@ -63,8 +65,8 @@ let from_hex s =
| _ -> raise (Invalid_argument "Digest.from_hex")
in
let byte i = digit s.[i] lsl 4 + digit s.[i+1] in
- let result = String.create 16 in
+ let result = Bytes.create 16 in
for i = 0 to 15 do
- result.[i] <- Char.chr (byte (2 * i));
+ Bytes.set result i (Char.chr (byte (2 * i)));
done;
- result
+ Bytes.unsafe_to_string result
diff --git a/stdlib/digest.mli b/stdlib/digest.mli
index 7fa1f15d6..583d2a46b 100644
--- a/stdlib/digest.mli
+++ b/stdlib/digest.mli
@@ -36,10 +36,16 @@ val compare : t -> t -> int
val string : string -> t
(** Return the digest of the given string. *)
+val bytes : bytes -> t
+(** Return the digest of the given byte sequence. *)
+
val substring : string -> int -> int -> t
(** [Digest.substring s ofs len] returns the digest of the substring
- of [s] starting at character number [ofs] and containing [len]
- characters. *)
+ of [s] starting at index [ofs] and containing [len] characters. *)
+
+val subbytes : bytes -> int -> int -> t
+(** [Digest.subbytes s ofs len] returns the digest of the subsequence
+ of [s] starting at index [ofs] and containing [len] bytes. *)
external channel : in_channel -> int -> t = "caml_md5_chan"
(** If [len] is nonnegative, [Digest.channel ic len] reads [len]
diff --git a/stdlib/filename.mli b/stdlib/filename.mli
index c44c6d954..a4ea3aaab 100644
--- a/stdlib/filename.mli
+++ b/stdlib/filename.mli
@@ -117,14 +117,13 @@ val set_temp_dir_name : string -> unit
@since 4.00.0
*)
-val temp_dir_name : string
-(** @deprecated The name of the initial temporary directory:
+val temp_dir_name : string [@@ocaml.deprecated]
+(** The name of the initial temporary directory:
Under Unix, the value of the [TMPDIR] environment variable, or "/tmp"
if the variable is not set.
Under Windows, the value of the [TEMP] environment variable, or "."
if the variable is not set.
- This function is deprecated; {!Filename.get_temp_dir_name} should be
- used instead.
+ @deprecated You should use {!Filename.get_temp_dir_name} instead.
@since 3.09.1
*)
diff --git a/stdlib/format.ml b/stdlib/format.ml
index 02222932e..5e206e11f 100644
--- a/stdlib/format.ml
+++ b/stdlib/format.ml
@@ -29,6 +29,10 @@ external int_of_size : size -> int = "%identity"
(* Tokens are one of the following : *)
+type block_type
+ = CamlinternalFormatBasics.block_type
+ = Pp_hbox | Pp_vbox | Pp_hvbox | Pp_hovbox | Pp_box | Pp_fits
+
type pp_token =
| Pp_text of string (* normal text *)
| Pp_break of int * int (* complete break *)
@@ -46,21 +50,7 @@ type pp_token =
and tag = string
-and block_type =
-| Pp_hbox (* Horizontal block no line breaking *)
-| Pp_vbox (* Vertical block each break leads to a new line *)
-| Pp_hvbox (* Horizontal-vertical block: same as vbox, except if this block
- is small enough to fit on a single line *)
-| Pp_hovbox (* Horizontal or Vertical block: breaks lead to new line
- only when necessary to print the content of the block *)
-| Pp_box (* Horizontal or Indent block: breaks lead to new line
- only when necessary to print the content of the block, or
- when it leads to a new indentation of the current line *)
-| Pp_fits (* Internal usage: when a block fits on a single line *)
-
-and tblock =
- | Pp_tbox of int list ref (* Tabulation box *)
-;;
+and tblock = Pp_tbox of int list ref (* Tabulation box *)
(* The Queue:
contains all formatting elements.
@@ -241,7 +231,6 @@ let pp_infinity = 1000000010;;
let pp_output_string state s = state.pp_out_string s 0 (String.length s)
and pp_output_newline state = state.pp_out_newline ()
and pp_output_spaces state n = state.pp_out_spaces n
-;;
(* To format a break, indenting a new line. *)
let break_new_line state offset width =
@@ -657,9 +646,7 @@ let pp_print_bool state b = pp_print_string state (string_of_bool b);;
(* To format a char. *)
let pp_print_char state c =
- let s = String.create 1 in
- s.[0] <- c;
- pp_print_as state 1 s
+ pp_print_as state 1 (String.make 1 c)
;;
(* Opening boxes. *)
@@ -901,7 +888,7 @@ let rec display_blanks state n =
;;
let pp_set_formatter_out_channel state os =
- state.pp_out_string <- output os;
+ state.pp_out_string <- output_substring os;
state.pp_out_flush <- (fun () -> flush os);
state.pp_out_newline <- display_newline state;
state.pp_out_spaces <- display_blanks state;
@@ -967,7 +954,7 @@ let make_formatter output flush =
;;
let formatter_of_out_channel oc =
- make_formatter (output oc) (fun () -> flush oc)
+ make_formatter (output_substring oc) (fun () -> flush oc)
;;
let formatter_of_buffer b =
@@ -1071,309 +1058,106 @@ and set_tags =
pp_set_tags std_formatter
;;
-
-(**************************************************************
-
- Printf implementation.
-
- **************************************************************)
-
-module Sformat = Printf.CamlinternalPr.Sformat;;
-module Tformat = Printf.CamlinternalPr.Tformat;;
-
-(* Error messages when processing formats. *)
-
-(* Trailer: giving up at character number ... *)
-let giving_up mess fmt i =
- Printf.sprintf
- "Format.fprintf: %s \'%s\', giving up at character number %d%s"
- mess (Sformat.to_string fmt) i
- (if i < Sformat.length fmt
- then Printf.sprintf " (%c)." (Sformat.get fmt i)
- else Printf.sprintf "%c" '.')
-;;
-
-(* When an invalid format deserves a special error explanation. *)
-let format_invalid_arg mess fmt i = invalid_arg (giving_up mess fmt i);;
-
-(* Standard invalid format. *)
-let invalid_format fmt i = format_invalid_arg "bad format" fmt i;;
-
-(* Cannot find a valid integer into that format. *)
-let invalid_integer fmt i =
- invalid_arg (giving_up "bad integer specification" fmt i);;
-
-(* Finding an integer size out of a sub-string of the format. *)
-let format_int_of_string fmt i s =
- let sz =
- try int_of_string s with
- | Failure _ -> invalid_integer fmt i in
- size_of_int sz
-;;
-
-(* Getting strings out of buffers. *)
-let get_buffer_out b =
- let s = Buffer.contents b in
- Buffer.reset b;
- s
-;;
-
-(* [ppf] is supposed to be a pretty-printer that outputs to buffer [b]:
- to extract the contents of [ppf] as a string we flush [ppf] and get the
- string out of [b]. *)
-let string_out b ppf =
- pp_flush_queue ppf false;
- get_buffer_out b
-;;
-
-(* Applies [printer] to a formatter that outputs on a fresh buffer,
- then returns the resulting material. *)
-let exstring printer arg =
- let b = Buffer.create 512 in
- let ppf = formatter_of_buffer b in
- printer ppf arg;
- string_out b ppf
-;;
-
-(* To turn out a character accumulator into the proper string result. *)
-let implode_rev s0 = function
- | [] -> s0
- | l -> String.concat "" (List.rev (s0 :: l))
-;;
-
-(* [mkprintf] is the printf-like function generator: given the
- - [to_s] flag that tells if we are printing into a string,
- - the [get_out] function that has to be called to get a [ppf] function to
- output onto,
- it generates a [kprintf] function that takes as arguments a [k]
- continuation function to be called at the end of formatting,
- and a printing format string to print the rest of the arguments
- according to the format string.
- Regular [fprintf]-like functions of this module are obtained via partial
- applications of [mkprintf]. *)
-let mkprintf to_s get_out k fmt =
-
- (* [out] is global to this definition of [pr], and must be shared by all its
- recursive calls (if any). *)
- let out = get_out fmt in
- let print_as = ref None in
- let outc c =
- match !print_as with
- | None -> pp_print_char out c
- | Some size ->
- pp_print_as_size out size (String.make 1 c);
- print_as := None
- and outs s =
- match !print_as with
- | None -> pp_print_string out s
- | Some size ->
- pp_print_as_size out size s;
- print_as := None
- and flush out = pp_print_flush out () in
-
- let rec pr k n fmt v =
-
- let len = Sformat.length fmt in
-
- let rec doprn n i =
- if i >= len then Obj.magic (k out) else
- match Sformat.get fmt i with
- | '%' ->
- Tformat.scan_format fmt v n i cont_s cont_a cont_t cont_f cont_m
- | '@' ->
- let i = succ i in
- if i >= len then invalid_format fmt i else
- begin match Sformat.get fmt i with
- | '[' ->
- do_pp_open_box out n (succ i)
- | ']' ->
- pp_close_box out ();
- doprn n (succ i)
- | '{' ->
- do_pp_open_tag out n (succ i)
- | '}' ->
- pp_close_tag out ();
- doprn n (succ i)
- | ' ' ->
- pp_print_space out ();
- doprn n (succ i)
- | ',' ->
- pp_print_cut out ();
- doprn n (succ i)
- | '?' ->
- pp_print_flush out ();
- doprn n (succ i)
- | '.' ->
- pp_print_newline out ();
- doprn n (succ i)
- | '\n' ->
- pp_force_newline out ();
- doprn n (succ i)
- | ';' ->
- do_pp_break out n (succ i)
- | '<' ->
- let got_size size n i =
- print_as := Some size;
- doprn n (skip_gt i) in
- get_int n (succ i) got_size
- | '@' ->
- outc '@';
- doprn n (succ i)
- | _ -> invalid_format fmt i
- end
- | c -> outc c; doprn n (succ i)
-
- and cont_s n s i =
- outs s; doprn n i
- and cont_a n printer arg i =
- if to_s then
- outs ((Obj.magic printer : unit -> _ -> string) () arg)
- else
- printer out arg;
- doprn n i
- and cont_t n printer i =
- if to_s then
- outs ((Obj.magic printer : unit -> string) ())
- else
- printer out;
- doprn n i
- and cont_f n i =
- flush out; doprn n i
- and cont_m n xf i =
- let m =
- Sformat.add_int_index
- (Tformat.count_printing_arguments_of_format xf) n in
- pr (Obj.magic (fun _ -> doprn m i)) n xf v
-
- and get_int n i c =
- if i >= len then invalid_integer fmt i else
- match Sformat.get fmt i with
- | ' ' -> get_int n (succ i) c
- | '%' ->
- let cont_s n s i = c (format_int_of_string fmt i s) n i
- and cont_a _n _printer _arg i = invalid_integer fmt i
- and cont_t _n _printer i = invalid_integer fmt i
- and cont_f _n i = invalid_integer fmt i
- and cont_m _n _sfmt i = invalid_integer fmt i in
- Tformat.scan_format fmt v n i cont_s cont_a cont_t cont_f cont_m
- | _ ->
- let rec get j =
- if j >= len then invalid_integer fmt j else
- match Sformat.get fmt j with
- | '0' .. '9' | '-' -> get (succ j)
- | _ ->
- let size =
- if j = i then size_of_int 0 else
- let s = Sformat.sub fmt (Sformat.index_of_int i) (j - i) in
- format_int_of_string fmt j s in
- c size n j in
- get i
-
- and skip_gt i =
- if i >= len then invalid_format fmt i else
- match Sformat.get fmt i with
- | ' ' -> skip_gt (succ i)
- | '>' -> succ i
- | _ -> invalid_format fmt i
-
- and get_box_kind i =
- if i >= len then Pp_box, i else
- match Sformat.get fmt i with
- | 'h' ->
- let i = succ i in
- if i >= len then Pp_hbox, i else
- begin match Sformat.get fmt i with
- | 'o' ->
- let i = succ i in
- if i >= len then format_invalid_arg "bad box format" fmt i else
- begin match Sformat.get fmt i with
- | 'v' -> Pp_hovbox, succ i
- | c ->
- format_invalid_arg
- ("bad box name ho" ^ String.make 1 c) fmt i
- end
- | 'v' -> Pp_hvbox, succ i
- | _ -> Pp_hbox, i
- end
- | 'b' -> Pp_box, succ i
- | 'v' -> Pp_vbox, succ i
- | _ -> Pp_box, i
-
- and get_tag_name n i c =
- let rec get accu n i j =
- if j >= len then
- c (implode_rev
- (Sformat.sub fmt (Sformat.index_of_int i) (j - i))
- accu)
- n j else
- match Sformat.get fmt j with
- | '>' ->
- c (implode_rev
- (Sformat.sub fmt (Sformat.index_of_int i) (j - i))
- accu)
- n j
- | '%' ->
- let s0 = Sformat.sub fmt (Sformat.index_of_int i) (j - i) in
- let cont_s n s i = get (s :: s0 :: accu) n i i
- and cont_a n printer arg i =
- let s =
- if to_s
- then (Obj.magic printer : unit -> _ -> string) () arg
- else exstring printer arg in
- get (s :: s0 :: accu) n i i
- and cont_t n printer i =
- let s =
- if to_s
- then (Obj.magic printer : unit -> string) ()
- else exstring (fun ppf () -> printer ppf) () in
- get (s :: s0 :: accu) n i i
- and cont_f _n i =
- format_invalid_arg "bad tag name specification" fmt i
- and cont_m _n _sfmt i =
- format_invalid_arg "bad tag name specification" fmt i in
- Tformat.scan_format fmt v n j cont_s cont_a cont_t cont_f cont_m
- | _ -> get accu n i (succ j) in
- get [] n i i
-
- and do_pp_break ppf n i =
- if i >= len then begin pp_print_space ppf (); doprn n i end else
- match Sformat.get fmt i with
- | '<' ->
- let rec got_nspaces nspaces n i =
- get_int n i (got_offset nspaces)
- and got_offset nspaces offset n i =
- pp_print_break ppf (int_of_size nspaces) (int_of_size offset);
- doprn n (skip_gt i) in
- get_int n (succ i) got_nspaces
- | _c -> pp_print_space ppf (); doprn n i
-
- and do_pp_open_box ppf n i =
- if i >= len then begin pp_open_box_gen ppf 0 Pp_box; doprn n i end else
- match Sformat.get fmt i with
- | '<' ->
- let kind, i = get_box_kind (succ i) in
- let got_size size n i =
- pp_open_box_gen ppf (int_of_size size) kind;
- doprn n (skip_gt i) in
- get_int n i got_size
- | _c -> pp_open_box_gen ppf 0 Pp_box; doprn n i
-
- and do_pp_open_tag ppf n i =
- if i >= len then begin pp_open_tag ppf ""; doprn n i end else
- match Sformat.get fmt i with
- | '<' ->
- let got_name tag_name n i =
- pp_open_tag ppf tag_name;
- doprn n (skip_gt i) in
- get_tag_name n (succ i) got_name
- | _c -> pp_open_tag ppf ""; doprn n i in
-
- doprn n 0 in
-
- let kpr = pr k (Sformat.index_of_int 0) in
-
- Tformat.kapr kpr fmt
-;;
+ (**************************************************************)
+
+let compute_tag output tag_acc =
+ let buf = Buffer.create 16 in
+ let ppf = formatter_of_buffer buf in
+ let () = output ppf tag_acc in
+ let () = pp_print_flush ppf () in
+ let len = Buffer.length buf in
+ if len < 2 then Buffer.contents buf
+ else Buffer.sub buf 1 (len - 2)
+
+ (**************************************************************
+
+ Defining continuations to be passed as arguments of
+ CamlinternalFormat.make_printf.
+
+ **************************************************************)
+
+open CamlinternalFormatBasics
+open CamlinternalFormat
+
+(* Interpret a formatting entity on a formatter. *)
+let output_formatting_lit ppf fmting_lit = match fmting_lit with
+ | Close_box -> pp_close_box ppf ()
+ | Close_tag -> pp_close_tag ppf ()
+ | Break (_, width, offset) -> pp_print_break ppf width offset
+ | FFlush -> pp_print_flush ppf ()
+ | Force_newline -> pp_force_newline ppf ()
+ | Flush_newline -> pp_print_newline ppf ()
+ | Magic_size (_, _) -> ()
+ | Escaped_at -> pp_print_char ppf '@'
+ | Escaped_percent -> pp_print_char ppf '%'
+ | Scan_indic c -> pp_print_char ppf '@'; pp_print_char ppf c
+
+(* Recursively output an "accumulator" containing a reversed list of
+ printing entities (string, char, flus, ...) in an output_stream. *)
+(* Differ from Printf.output_acc by the interpretation of formatting. *)
+(* Used as a continuation of CamlinternalFormat.make_printf. *)
+let rec output_acc ppf acc = match acc with
+ | Acc_string_literal (Acc_formatting_lit (p, Magic_size (_, size)), s)
+ | Acc_data_string (Acc_formatting_lit (p, Magic_size (_, size)), s) ->
+ output_acc ppf p;
+ pp_print_as_size ppf (size_of_int size) s;
+ | Acc_char_literal (Acc_formatting_lit (p, Magic_size (_, size)), c)
+ | Acc_data_char (Acc_formatting_lit (p, Magic_size (_, size)), c) ->
+ output_acc ppf p;
+ pp_print_as_size ppf (size_of_int size) (String.make 1 c);
+ | Acc_formatting_lit (p, f) ->
+ output_acc ppf p;
+ output_formatting_lit ppf f;
+ | Acc_formatting_gen (p, Acc_open_tag acc') ->
+ output_acc ppf p;
+ pp_open_tag ppf (compute_tag output_acc acc')
+ | Acc_formatting_gen (p, Acc_open_box acc') ->
+ let () = output_acc ppf p in
+ let (indent, bty) = open_box_of_string (compute_tag output_acc acc') in
+ pp_open_box_gen ppf indent bty
+ | Acc_string_literal (p, s)
+ | Acc_data_string (p, s) -> output_acc ppf p; pp_print_string ppf s;
+ | Acc_char_literal (p, c)
+ | Acc_data_char (p, c) -> output_acc ppf p; pp_print_char ppf c;
+ | Acc_delay (p, f) -> output_acc ppf p; f ppf;
+ | Acc_flush p -> output_acc ppf p; pp_print_flush ppf ();
+ | Acc_invalid_arg (p, msg) -> output_acc ppf p; invalid_arg msg;
+ | End_of_acc -> ()
+
+(* Recursively output an "accumulator" containing a reversed list of
+ printing entities (string, char, flus, ...) in a buffer. *)
+(* Differ from Printf.bufput_acc by the interpretation of formatting. *)
+(* Used as a continuation of CamlinternalFormat.make_printf. *)
+let rec strput_acc ppf acc = match acc with
+ | Acc_string_literal (Acc_formatting_lit (p, Magic_size (_, size)), s)
+ | Acc_data_string (Acc_formatting_lit (p, Magic_size (_, size)), s) ->
+ strput_acc ppf p;
+ pp_print_as_size ppf (size_of_int size) s;
+ | Acc_char_literal (Acc_formatting_lit (p, Magic_size (_, size)), c)
+ | Acc_data_char (Acc_formatting_lit (p, Magic_size (_, size)), c) ->
+ strput_acc ppf p;
+ pp_print_as_size ppf (size_of_int size) (String.make 1 c);
+ | Acc_delay (Acc_formatting_lit (p, Magic_size (_, size)), f) ->
+ strput_acc ppf p;
+ pp_print_as_size ppf (size_of_int size) (f ());
+ | Acc_formatting_lit (p, f) ->
+ strput_acc ppf p;
+ output_formatting_lit ppf f;
+ | Acc_formatting_gen (p, Acc_open_tag acc') ->
+ strput_acc ppf p;
+ pp_open_tag ppf (compute_tag strput_acc acc')
+ | Acc_formatting_gen (p, Acc_open_box acc') ->
+ let () = strput_acc ppf p in
+ let (indent, bty) = open_box_of_string (compute_tag strput_acc acc') in
+ pp_open_box_gen ppf indent bty
+ | Acc_string_literal (p, s)
+ | Acc_data_string (p, s) -> strput_acc ppf p; pp_print_string ppf s;
+ | Acc_char_literal (p, c)
+ | Acc_data_char (p, c) -> strput_acc ppf p; pp_print_char ppf c;
+ | Acc_delay (p, f) -> strput_acc ppf p; pp_print_string ppf (f ());
+ | Acc_flush p -> strput_acc ppf p; pp_print_flush ppf ();
+ | Acc_invalid_arg (p, msg) -> strput_acc ppf p; invalid_arg msg;
+ | End_of_acc -> ()
(**************************************************************
@@ -1381,30 +1165,37 @@ let mkprintf to_s get_out k fmt =
**************************************************************)
-let kfprintf k ppf = mkprintf false (fun _ -> ppf) k;;
-let ikfprintf k ppf = Tformat.kapr (fun _ _ -> Obj.magic (k ppf));;
-
-let fprintf ppf = kfprintf ignore ppf;;
-let ifprintf ppf = ikfprintf ignore ppf;;
-let printf fmt = fprintf std_formatter fmt;;
-let eprintf fmt = fprintf err_formatter fmt;;
-
-let ksprintf k =
+let kfprintf k o (Format (fmt, _)) =
+ make_printf (fun o acc -> output_acc o acc; k o) o End_of_acc fmt
+let ikfprintf k x (Format (fmt, _)) =
+ make_printf (fun _ _ -> k x) x End_of_acc fmt
+
+let fprintf ppf fmt = kfprintf ignore ppf fmt
+let ifprintf ppf fmt = ikfprintf ignore ppf fmt
+let printf fmt = fprintf std_formatter fmt
+let eprintf fmt = fprintf err_formatter fmt
+
+let ksprintf k (Format (fmt, _)) =
+ let k' () acc =
+ let b = Buffer.create 512 in
+ let ppf = formatter_of_buffer b in
+ strput_acc ppf acc;
+ pp_flush_queue ppf false;
+ k (Buffer.contents b) in
+ make_printf k' () End_of_acc fmt
+
+let sprintf fmt =
+ ksprintf (fun s -> s) fmt
+
+let asprintf (Format (fmt, _)) =
let b = Buffer.create 512 in
- let k ppf = k (string_out b ppf) in
let ppf = formatter_of_buffer b in
- let get_out _ = ppf in
- mkprintf true get_out k
-;;
-
-let sprintf fmt = ksprintf (fun s -> s) fmt;;
-
-let asprintf fmt =
- let b = Buffer.create 512 in
- let k ppf = string_out b ppf in
- let ppf = formatter_of_buffer b in
- let get_out _ = ppf in
- mkprintf false get_out k fmt;;
+ let k' : (formatter -> (formatter, unit) acc -> string)
+ = fun ppf acc ->
+ output_acc ppf acc;
+ pp_flush_queue ppf false;
+ Buffer.contents b in
+ make_printf k' ppf End_of_acc fmt
(**************************************************************
@@ -1412,15 +1203,10 @@ let asprintf fmt =
**************************************************************)
-let kbprintf k b =
- mkprintf false (fun _ -> formatter_of_buffer b) k
-;;
-
(* Deprecated error prone function bprintf. *)
-let bprintf b =
- let k ppf = pp_flush_queue ppf false in
- kbprintf k b
-;;
+let bprintf b (Format (fmt, _) : ('a, formatter, unit) format) =
+ let k ppf acc = output_acc ppf acc; pp_flush_queue ppf false in
+ make_printf k (formatter_of_buffer b) End_of_acc fmt
(* Deprecated alias for ksprintf. *)
let kprintf = ksprintf;;
diff --git a/stdlib/format.mli b/stdlib/format.mli
index e7cbe506e..b44fc0a94 100644
--- a/stdlib/format.mli
+++ b/stdlib/format.mli
@@ -714,14 +714,18 @@ val ksprintf : (string -> 'a) -> ('b, unit, string, 'a) format4 -> 'b;;
(** {6 Deprecated} *)
-val bprintf : Buffer.t -> ('a, formatter, unit) format -> 'a;;
+val bprintf : Buffer.t -> ('a, formatter, unit) format -> 'a
+ [@@ocaml.deprecated]
+;;
(** @deprecated This function is error prone. Do not use it.
If you need to print to some buffer [b], you must first define a
formatter writing to [b], using [let to_b = formatter_of_buffer b]; then
use regular calls to [Format.fprintf] on formatter [to_b]. *)
-val kprintf : (string -> 'a) -> ('b, unit, string, 'a) format4 -> 'b;;
+val kprintf : (string -> 'a) -> ('b, unit, string, 'a) format4 -> 'b
+ [@@ocaml.deprecated]
+;;
(** @deprecated An alias for [ksprintf]. *)
val set_all_formatter_output_functions :
@@ -730,6 +734,7 @@ val set_all_formatter_output_functions :
newline:(unit -> unit) ->
spaces:(int -> unit) ->
unit
+[@@ocaml.deprecated]
;;
(** @deprecated Subsumed by [set_formatter_out_functions].
*)
@@ -740,12 +745,14 @@ val get_all_formatter_output_functions :
(unit -> unit) *
(unit -> unit) *
(int -> unit)
+[@@ocaml.deprecated]
;;
(** @deprecated Subsumed by [get_formatter_out_functions].
*)
val pp_set_all_formatter_output_functions :
formatter -> out:(string -> int -> int -> unit) -> flush:(unit -> unit) ->
newline:(unit -> unit) -> spaces:(int -> unit) -> unit
+[@@ocaml.deprecated]
;;
(** @deprecated Subsumed by [pp_set_formatter_out_functions].
*)
@@ -754,6 +761,7 @@ val pp_get_all_formatter_output_functions :
formatter -> unit ->
(string -> int -> int -> unit) * (unit -> unit) * (unit -> unit) *
(int -> unit)
+[@@ocaml.deprecated]
;;
(** @deprecated Subsumed by [pp_get_formatter_out_functions].
*)
diff --git a/stdlib/gc.mli b/stdlib/gc.mli
index d2f2d9761..f86a1e687 100644
--- a/stdlib/gc.mli
+++ b/stdlib/gc.mli
@@ -83,7 +83,7 @@ type stat =
type control =
{ mutable minor_heap_size : int;
(** The size (in words) of the minor heap. Changing
- this parameter will trigger a minor collection. Default: 32k. *)
+ this parameter will trigger a minor collection. Default: 256k. *)
mutable major_heap_increment : int;
(** How much to add to the major heap when increasing it. If this
@@ -131,7 +131,7 @@ type control =
mutable stack_limit : int;
(** The maximum size of the stack (in words). This is only
relevant to the byte-code runtime, as the native code runtime
- uses the operating system's stack. Default: 256k. *)
+ uses the operating system's stack. Default: 1024k. *)
mutable allocation_policy : int;
(** The policy used for allocating in the heap. Possible
@@ -215,6 +215,9 @@ val finalise : ('a -> unit) -> 'a -> unit
before the values it depends upon. Of course, this becomes
false if additional dependencies are introduced by assignments.
+ In the presence of multiple OCaml threads it should be assumed that
+ any particular finaliser may be executed in any of the threads.
+
Anything reachable from the closure of finalisation functions
is considered reachable, so the following code will not work
as expected:
@@ -249,7 +252,7 @@ val finalise : ('a -> unit) -> 'a -> unit
another copy is still in use by the program.
- The results of calling {!String.make}, {!String.create},
+ The results of calling {!String.make}, {!Bytes.make}, {!Bytes.create},
{!Array.make}, and {!Pervasives.ref} are guaranteed to be
heap-allocated and non-constant except when the length argument is [0].
*)
diff --git a/stdlib/genlex.ml b/stdlib/genlex.ml
index dc80727df..4f52b1865 100644
--- a/stdlib/genlex.ml
+++ b/stdlib/genlex.ml
@@ -21,7 +21,7 @@ type token =
(* The string buffering machinery *)
-let initial_buffer = String.create 32
+let initial_buffer = Bytes.create 32
let buffer = ref initial_buffer
let bufpos = ref 0
@@ -29,16 +29,16 @@ let bufpos = ref 0
let reset_buffer () = buffer := initial_buffer; bufpos := 0
let store c =
- if !bufpos >= String.length !buffer then
- begin
- let newbuffer = String.create (2 * !bufpos) in
- String.blit !buffer 0 newbuffer 0 !bufpos; buffer := newbuffer
- end;
- String.set !buffer !bufpos c;
+ if !bufpos >= Bytes.length !buffer then begin
+ let newbuffer = Bytes.create (2 * !bufpos) in
+ Bytes.blit !buffer 0 newbuffer 0 !bufpos;
+ buffer := newbuffer
+ end;
+ Bytes.set !buffer !bufpos c;
incr bufpos
let get_string () =
- let s = String.sub !buffer 0 !bufpos in buffer := initial_buffer; s
+ let s = Bytes.sub_string !buffer 0 !bufpos in buffer := initial_buffer; s
(* The lexer *)
diff --git a/stdlib/hashtbl.mli b/stdlib/hashtbl.mli
index 5424df40d..0c3e4999f 100644
--- a/stdlib/hashtbl.mli
+++ b/stdlib/hashtbl.mli
@@ -187,6 +187,34 @@ val stats : ('a, 'b) t -> statistics
(** {6 Functorial interface} *)
+(** The functorial interface allows the use of specific comparison
+ and hash functions, either for performance/security concerns,
+ or because keys are not hashable/comparable with the polymorphic builtins.
+
+ For instance, one might want to specialize a table for integer keys:
+ {[
+ module IntHash =
+ struct
+ type t = int
+ let equal i j = i=j
+ let hash i = i land max_int
+ end
+
+ module IntHashtbl = Hashtbl.Make(IntHash)
+
+ let h = IntHashtbl.create 17 in
+ IntHashtbl.add h 12 "hello";;
+ ]}
+
+ This creates a new module [IntHashtbl], with a new type ['a
+ IntHashtbl.t] of tables from [int] to ['a]. In this example, [h]
+ contains [string] values so its type is [string IntHashtbl.t].
+
+ Note that the new type ['a IntHashtbl.t] is not compatible with
+ the type [('a,'b) Hashtbl.t] of the generic interface. For
+ example, [Hashtbl.length h] would not type-check, you must use
+ [IntHashtbl.length].
+*)
module type HashedType =
sig
diff --git a/stdlib/header.c b/stdlib/header.c
index cb3d9953a..93cdfeb2d 100644
--- a/stdlib/header.c
+++ b/stdlib/header.c
@@ -133,7 +133,7 @@ static char * read_runtime_path(int fd)
char buffer[TRAILER_SIZE];
static char runtime_path[MAXPATHLEN];
int num_sections, i;
- uint32 path_size;
+ uint32_t path_size;
long ofs;
lseek(fd, (long) -TRAILER_SIZE, SEEK_END);
diff --git a/stdlib/lazy.mli b/stdlib/lazy.mli
index 6108a715c..6ade2e3d4 100644
--- a/stdlib/lazy.mli
+++ b/stdlib/lazy.mli
@@ -75,11 +75,11 @@ val is_val : 'a t -> bool;;
did not raise an exception.
@since 4.00.0 *)
-val lazy_from_fun : (unit -> 'a) -> 'a t;;
+val lazy_from_fun : (unit -> 'a) -> 'a t [@@ocaml.deprecated];;
(** @deprecated synonym for [from_fun]. *)
-val lazy_from_val : 'a -> 'a t;;
+val lazy_from_val : 'a -> 'a t [@@ocaml.deprecated];;
(** @deprecated synonym for [from_val]. *)
-val lazy_is_val : 'a t -> bool;;
+val lazy_is_val : 'a t -> bool [@@ocaml.deprecated];;
(** @deprecated synonym for [is_val]. *)
diff --git a/stdlib/lexing.ml b/stdlib/lexing.ml
index 53748ad86..d2231fbe9 100644
--- a/stdlib/lexing.ml
+++ b/stdlib/lexing.ml
@@ -29,7 +29,7 @@ let dummy_pos = {
type lexbuf =
{ refill_buff : lexbuf -> unit;
- mutable lex_buffer : string;
+ mutable lex_buffer : bytes;
mutable lex_buffer_len : int;
mutable lex_abs_pos : int;
mutable lex_start_pos : int;
@@ -81,7 +81,7 @@ let new_engine tbl state buf =
let lex_refill read_fun aux_buffer lexbuf =
let read =
- read_fun aux_buffer (String.length aux_buffer) in
+ read_fun aux_buffer (Bytes.length aux_buffer) in
let n =
if read > 0
then read
@@ -90,16 +90,16 @@ let lex_refill read_fun aux_buffer lexbuf =
<-------|---------------------|----------->
| junk | valid data | junk |
^ ^ ^ ^
- 0 start_pos buffer_end String.length buffer
+ 0 start_pos buffer_end Bytes.length buffer
*)
- if lexbuf.lex_buffer_len + n > String.length lexbuf.lex_buffer then begin
+ if lexbuf.lex_buffer_len + n > Bytes.length lexbuf.lex_buffer then begin
(* There is not enough space at the end of the buffer *)
if lexbuf.lex_buffer_len - lexbuf.lex_start_pos + n
- <= String.length lexbuf.lex_buffer
+ <= Bytes.length lexbuf.lex_buffer
then begin
(* But there is enough space if we reclaim the junk at the beginning
of the buffer *)
- String.blit lexbuf.lex_buffer lexbuf.lex_start_pos
+ Bytes.blit lexbuf.lex_buffer lexbuf.lex_start_pos
lexbuf.lex_buffer 0
(lexbuf.lex_buffer_len - lexbuf.lex_start_pos)
end else begin
@@ -107,12 +107,12 @@ let lex_refill read_fun aux_buffer lexbuf =
space since n <= String.length aux_buffer <= String.length buffer.
Watch out for string length overflow, though. *)
let newlen =
- min (2 * String.length lexbuf.lex_buffer) Sys.max_string_length in
+ min (2 * Bytes.length lexbuf.lex_buffer) Sys.max_string_length in
if lexbuf.lex_buffer_len - lexbuf.lex_start_pos + n > newlen
then failwith "Lexing.lex_refill: cannot grow buffer";
- let newbuf = String.create newlen in
+ let newbuf = Bytes.create newlen in
(* Copy the valid data to the beginning of the new buffer *)
- String.blit lexbuf.lex_buffer lexbuf.lex_start_pos
+ Bytes.blit lexbuf.lex_buffer lexbuf.lex_start_pos
newbuf 0
(lexbuf.lex_buffer_len - lexbuf.lex_start_pos);
lexbuf.lex_buffer <- newbuf
@@ -133,9 +133,7 @@ let lex_refill read_fun aux_buffer lexbuf =
done
end;
(* There is now enough space at the end of the buffer *)
- String.blit aux_buffer 0
- lexbuf.lex_buffer lexbuf.lex_buffer_len
- n;
+ Bytes.blit aux_buffer 0 lexbuf.lex_buffer lexbuf.lex_buffer_len n;
lexbuf.lex_buffer_len <- lexbuf.lex_buffer_len + n
let zero_pos = {
@@ -146,8 +144,8 @@ let zero_pos = {
};;
let from_function f =
- { refill_buff = lex_refill f (String.create 512);
- lex_buffer = String.create 1024;
+ { refill_buff = lex_refill f (Bytes.create 512);
+ lex_buffer = Bytes.create 1024;
lex_buffer_len = 0;
lex_abs_pos = 0;
lex_start_pos = 0;
@@ -165,7 +163,8 @@ let from_channel ic =
let from_string s =
{ refill_buff = (fun lexbuf -> lexbuf.lex_eof_reached <- true);
- lex_buffer = s ^ "";
+ lex_buffer = Bytes.of_string s; (* have to make a copy for compatibility
+ with unsafe-string mode *)
lex_buffer_len = String.length s;
lex_abs_pos = 0;
lex_start_pos = 0;
@@ -180,37 +179,31 @@ let from_string s =
let lexeme lexbuf =
let len = lexbuf.lex_curr_pos - lexbuf.lex_start_pos in
- let s = String.create len in
- String.unsafe_blit lexbuf.lex_buffer lexbuf.lex_start_pos s 0 len;
- s
+ Bytes.sub_string lexbuf.lex_buffer lexbuf.lex_start_pos len
let sub_lexeme lexbuf i1 i2 =
let len = i2-i1 in
- let s = String.create len in
- String.unsafe_blit lexbuf.lex_buffer i1 s 0 len;
- s
+ Bytes.sub_string lexbuf.lex_buffer i1 len
let sub_lexeme_opt lexbuf i1 i2 =
if i1 >= 0 then begin
let len = i2-i1 in
- let s = String.create len in
- String.unsafe_blit lexbuf.lex_buffer i1 s 0 len;
- Some s
+ Some (Bytes.sub_string lexbuf.lex_buffer i1 len)
end else begin
None
end
-let sub_lexeme_char lexbuf i = lexbuf.lex_buffer.[i]
+let sub_lexeme_char lexbuf i = Bytes.get lexbuf.lex_buffer i
let sub_lexeme_char_opt lexbuf i =
if i >= 0 then
- Some lexbuf.lex_buffer.[i]
+ Some (Bytes.get lexbuf.lex_buffer i)
else
None
let lexeme_char lexbuf i =
- String.get lexbuf.lex_buffer (lexbuf.lex_start_pos + i)
+ Bytes.get lexbuf.lex_buffer (lexbuf.lex_start_pos + i)
let lexeme_start lexbuf = lexbuf.lex_start_p.pos_cnum;;
let lexeme_end lexbuf = lexbuf.lex_curr_p.pos_cnum;;
diff --git a/stdlib/lexing.mli b/stdlib/lexing.mli
index 6d5406d69..30898670b 100644
--- a/stdlib/lexing.mli
+++ b/stdlib/lexing.mli
@@ -46,7 +46,7 @@ val dummy_pos : position;;
type lexbuf =
{ refill_buff : lexbuf -> unit;
- mutable lex_buffer : string;
+ mutable lex_buffer : bytes;
mutable lex_buffer_len : int;
mutable lex_abs_pos : int;
mutable lex_start_pos : int;
@@ -84,12 +84,12 @@ val from_string : string -> lexbuf
the string. An end-of-input condition is generated when the
end of the string is reached. *)
-val from_function : (string -> int -> int) -> lexbuf
+val from_function : (bytes -> int -> int) -> lexbuf
(** Create a lexer buffer with the given function as its reading method.
When the scanner needs more characters, it will call the given
- function, giving it a character string [s] and a character
- count [n]. The function should put [n] characters or less in [s],
- starting at character number 0, and return the number of characters
+ function, giving it a byte sequence [s] and a byte
+ count [n]. The function should put [n] bytes or fewer in [s],
+ starting at index 0, and return the number of bytes
provided. A return value of 0 means end of input. *)
diff --git a/stdlib/list.mli b/stdlib/list.mli
index 5b88f229d..b53a63c64 100644
--- a/stdlib/list.mli
+++ b/stdlib/list.mli
@@ -112,14 +112,14 @@ val fold_right : ('a -> 'b -> 'b) -> 'a list -> 'b -> 'b
val iter2 : ('a -> 'b -> unit) -> 'a list -> 'b list -> unit
(** [List.iter2 f [a1; ...; an] [b1; ...; bn]] calls in turn
[f a1 b1; ...; f an bn].
- Raise [Invalid_argument] if the two lists have
- different lengths. *)
+ Raise [Invalid_argument] if the two lists are determined
+ to have different lengths. *)
val map2 : ('a -> 'b -> 'c) -> 'a list -> 'b list -> 'c list
(** [List.map2 f [a1; ...; an] [b1; ...; bn]] is
[[f a1 b1; ...; f an bn]].
- Raise [Invalid_argument] if the two lists have
- different lengths. Not tail-recursive. *)
+ Raise [Invalid_argument] if the two lists are determined
+ to have different lengths. Not tail-recursive. *)
val rev_map2 : ('a -> 'b -> 'c) -> 'a list -> 'b list -> 'c list
(** [List.rev_map2 f l1 l2] gives the same result as
@@ -129,14 +129,14 @@ val rev_map2 : ('a -> 'b -> 'c) -> 'a list -> 'b list -> 'c list
val fold_left2 : ('a -> 'b -> 'c -> 'a) -> 'a -> 'b list -> 'c list -> 'a
(** [List.fold_left2 f a [b1; ...; bn] [c1; ...; cn]] is
[f (... (f (f a b1 c1) b2 c2) ...) bn cn].
- Raise [Invalid_argument] if the two lists have
- different lengths. *)
+ Raise [Invalid_argument] if the two lists are determined
+ to have different lengths. *)
val fold_right2 : ('a -> 'b -> 'c -> 'c) -> 'a list -> 'b list -> 'c -> 'c
(** [List.fold_right2 f [a1; ...; an] [b1; ...; bn] c] is
[f a1 b1 (f a2 b2 (... (f an bn c) ...))].
- Raise [Invalid_argument] if the two lists have
- different lengths. Not tail-recursive. *)
+ Raise [Invalid_argument] if the two lists are determined
+ to have different lengths. Not tail-recursive. *)
(** {6 List scanning} *)
@@ -154,13 +154,13 @@ val exists : ('a -> bool) -> 'a list -> bool
val for_all2 : ('a -> 'b -> bool) -> 'a list -> 'b list -> bool
(** Same as {!List.for_all}, but for a two-argument predicate.
- Raise [Invalid_argument] if the two lists have
- different lengths. *)
+ Raise [Invalid_argument] if the two lists are determined
+ to have different lengths. *)
val exists2 : ('a -> 'b -> bool) -> 'a list -> 'b list -> bool
(** Same as {!List.exists}, but for a two-argument predicate.
- Raise [Invalid_argument] if the two lists have
- different lengths. *)
+ Raise [Invalid_argument] if the two lists are determined
+ to have different lengths. *)
val mem : 'a -> 'a list -> bool
(** [mem a l] is true if and only if [a] is equal
diff --git a/stdlib/listLabels.mli b/stdlib/listLabels.mli
index 8cf651471..45e3c41ea 100644
--- a/stdlib/listLabels.mli
+++ b/stdlib/listLabels.mli
@@ -112,14 +112,14 @@ val fold_right : f:('a -> 'b -> 'b) -> 'a list -> init:'b -> 'b
val iter2 : f:('a -> 'b -> unit) -> 'a list -> 'b list -> unit
(** [List.iter2 f [a1; ...; an] [b1; ...; bn]] calls in turn
[f a1 b1; ...; f an bn].
- Raise [Invalid_argument] if the two lists have
- different lengths. *)
+ Raise [Invalid_argument] if the two lists are determined
+ to have different lengths. *)
val map2 : f:('a -> 'b -> 'c) -> 'a list -> 'b list -> 'c list
(** [List.map2 f [a1; ...; an] [b1; ...; bn]] is
[[f a1 b1; ...; f an bn]].
- Raise [Invalid_argument] if the two lists have
- different lengths. Not tail-recursive. *)
+ Raise [Invalid_argument] if the two lists are determined
+ to have different lengths. Not tail-recursive. *)
val rev_map2 : f:('a -> 'b -> 'c) -> 'a list -> 'b list -> 'c list
(** [List.rev_map2 f l1 l2] gives the same result as
@@ -130,15 +130,15 @@ val fold_left2 :
f:('a -> 'b -> 'c -> 'a) -> init:'a -> 'b list -> 'c list -> 'a
(** [List.fold_left2 f a [b1; ...; bn] [c1; ...; cn]] is
[f (... (f (f a b1 c1) b2 c2) ...) bn cn].
- Raise [Invalid_argument] if the two lists have
- different lengths. *)
+ Raise [Invalid_argument] if the two lists are determined
+ to have different lengths. *)
val fold_right2 :
f:('a -> 'b -> 'c -> 'c) -> 'a list -> 'b list -> init:'c -> 'c
(** [List.fold_right2 f [a1; ...; an] [b1; ...; bn] c] is
[f a1 b1 (f a2 b2 (... (f an bn c) ...))].
- Raise [Invalid_argument] if the two lists have
- different lengths. Not tail-recursive. *)
+ Raise [Invalid_argument] if the two lists are determined
+ to have different lengths. Not tail-recursive. *)
(** {6 List scanning} *)
@@ -156,13 +156,13 @@ val exists : f:('a -> bool) -> 'a list -> bool
val for_all2 : f:('a -> 'b -> bool) -> 'a list -> 'b list -> bool
(** Same as {!ListLabels.for_all}, but for a two-argument predicate.
- Raise [Invalid_argument] if the two lists have
- different lengths. *)
+ Raise [Invalid_argument] if the two lists are determined
+ to have different lengths. *)
val exists2 : f:('a -> 'b -> bool) -> 'a list -> 'b list -> bool
(** Same as {!ListLabels.exists}, but for a two-argument predicate.
- Raise [Invalid_argument] if the two lists have
- different lengths. *)
+ Raise [Invalid_argument] if the two lists are determined
+ to have different lengths. *)
val mem : 'a -> set:'a list -> bool
(** [mem a l] is true if and only if [a] is equal
diff --git a/stdlib/map.mli b/stdlib/map.mli
index 6dd371b52..abf453161 100644
--- a/stdlib/map.mli
+++ b/stdlib/map.mli
@@ -19,6 +19,26 @@
All operations over maps are purely applicative (no side-effects).
The implementation uses balanced binary trees, and therefore searching
and insertion take time logarithmic in the size of the map.
+
+ For instance:
+ {[
+ module IntPairs =
+ struct
+ type t = int * int
+ let compare (x0,y0) (x1,y1) =
+ match Pervasives.compare x0 x1 with
+ 0 -> Pervasives.compare y0 y1
+ | c -> c
+ end
+
+ module PairsMap = Map.Make(IntPairs)
+
+ let m = PairsMap.(empty |> add (0,1) "hello" |> add (1,0) "world")
+ ]}
+
+ This creates a new module [PairsMap], with a new type ['a PairsMap.t]
+ of maps from [int * int] to ['a]. In this example, [m] contains [string]
+ values so its type is [string PairsMap.t].
*)
module type OrderedType =
diff --git a/stdlib/marshal.ml b/stdlib/marshal.ml
index 7a65a16a3..415559571 100644
--- a/stdlib/marshal.ml
+++ b/stdlib/marshal.ml
@@ -19,35 +19,48 @@ type extern_flags =
external to_channel: out_channel -> 'a -> extern_flags list -> unit
= "caml_output_value"
+external to_bytes: 'a -> extern_flags list -> bytes
+ = "caml_output_value_to_string"
external to_string: 'a -> extern_flags list -> string
= "caml_output_value_to_string"
external to_buffer_unsafe:
- string -> int -> int -> 'a -> extern_flags list -> int
+ bytes -> int -> int -> 'a -> extern_flags list -> int
= "caml_output_value_to_buffer"
let to_buffer buff ofs len v flags =
- if ofs < 0 || len < 0 || ofs > String.length buff - len
+ if ofs < 0 || len < 0 || ofs > Bytes.length buff - len
then invalid_arg "Marshal.to_buffer: substring out of bounds"
else to_buffer_unsafe buff ofs len v flags
+(* The functions below use byte sequences as input, never using any
+ mutation. It makes sense to use non-mutated [bytes] rather than
+ [string], because we really work with sequences of bytes, not
+ a text representation.
+*)
+
external from_channel: in_channel -> 'a = "caml_input_value"
-external from_string_unsafe: string -> int -> 'a
+external from_bytes_unsafe: bytes -> int -> 'a
= "caml_input_value_from_string"
-external data_size_unsafe: string -> int -> int = "caml_marshal_data_size"
+external data_size_unsafe: bytes -> int -> int = "caml_marshal_data_size"
let header_size = 20
let data_size buff ofs =
- if ofs < 0 || ofs > String.length buff - header_size
+ if ofs < 0 || ofs > Bytes.length buff - header_size
then invalid_arg "Marshal.data_size"
else data_size_unsafe buff ofs
let total_size buff ofs = header_size + data_size buff ofs
-let from_string buff ofs =
- if ofs < 0 || ofs > String.length buff - header_size
- then invalid_arg "Marshal.from_size"
+let from_bytes buff ofs =
+ if ofs < 0 || ofs > Bytes.length buff - header_size
+ then invalid_arg "Marshal.from_bytes"
else begin
let len = data_size_unsafe buff ofs in
- if ofs > String.length buff - (header_size + len)
- then invalid_arg "Marshal.from_string"
- else from_string_unsafe buff ofs
+ if ofs > Bytes.length buff - (header_size + len)
+ then invalid_arg "Marshal.from_bytes"
+ else from_bytes_unsafe buff ofs
end
+
+let from_string buff ofs =
+ (* Bytes.unsafe_of_string is safe here, as the produced byte
+ sequence is never mutated *)
+ from_bytes (Bytes.unsafe_of_string buff) ofs
diff --git a/stdlib/marshal.mli b/stdlib/marshal.mli
index f12af9fd9..9dfdd1624 100644
--- a/stdlib/marshal.mli
+++ b/stdlib/marshal.mli
@@ -33,12 +33,13 @@
Anything can happen at run-time if the object in the file does not
belong to the given type.
- OCaml exception values (of type [exn]) returned by the unmarhsaller
- should not be pattern-matched over through [match ... with] or [try
- ... with], because unmarshalling does not preserve the information
- required for matching their exception constructor. Structural
- equalities with other exception values, or most other uses such as
- Printexc.to_string, will still work as expected.
+ Values of extensible variant types, for example exceptions (of
+ extensible type [exn]), returned by the unmarhsaller should not be
+ pattern-matched over through [match ... with] or [try ... with],
+ because unmarshalling does not preserve the information required for
+ matching their constructors. Structural equalities with other
+ extensible variant values does not work either. Most other uses such
+ as Printexc.to_string, will still work as expected.
The representation of marshaled values is not human-readable,
and uses bytes that are not printable characters. Therefore,
@@ -47,7 +48,7 @@
[open_out_bin] or [open_in_bin]; channels opened in text mode will
cause unmarshaling errors on platforms where text channels behave
differently than binary channels, e.g. Windows.
-*)
+ *)
type extern_flags =
No_sharing (** Don't preserve sharing *)
@@ -74,17 +75,26 @@ val to_channel : out_channel -> 'a -> extern_flags list -> unit
byte representations if [v] actually contains sharing,
or even non-termination if [v] contains cycles.
- If [flags] does not contain [Marshal.Closures],
- marshaling fails when it encounters a functional value
- inside [v]: only 'pure' data structures, containing neither
- functions nor objects, can safely be transmitted between
- different programs. If [flags] contains [Marshal.Closures],
- functional values will be marshaled as a position in the code
- of the program. In this case, the output of marshaling can
- only be read back in processes that run exactly the same program,
- with exactly the same compiled code. (This is checked
- at un-marshaling time, using an MD5 digest of the code
- transmitted along with the code position.)
+ If [flags] does not contain [Marshal.Closures], marshaling fails
+ when it encounters a functional value inside [v]: only 'pure' data
+ structures, containing neither functions nor objects, can safely be
+ transmitted between different programs. If [flags] contains
+ [Marshal.Closures], functional values will be marshaled as a the
+ position in the code of the program together with the values
+ corresponding to the free variables captured in the closure. In
+ this case, the output of marshaling can only be read back in
+ processes that run exactly the same program, with exactly the same
+ compiled code. (This is checked at un-marshaling time, using an MD5
+ digest of the code transmitted along with the code position.)
+
+ The exact definition of which free variables are captured in a
+ closure is not specified and can very between bytecode and native
+ code (and according to optimization flags). In particular, a
+ function value accessing a global reference may or may not include
+ the reference in its closure. If it does, unmarshaling the
+ corresponding closure will create a new reference, different from
+ the global one.
+
If [flags] contains [Marshal.Compat_32], marshaling fails when
it encounters an integer value outside the range [[-2{^30}, 2{^30}-1]]
@@ -99,19 +109,24 @@ val to_channel : out_channel -> 'a -> extern_flags list -> unit
it has no effect if marshaling is performed on a 32-bit platform.
*)
-external to_string :
- 'a -> extern_flags list -> string = "caml_output_value_to_string"
-(** [Marshal.to_string v flags] returns a string containing
- the representation of [v] as a sequence of bytes.
+external to_bytes :
+ 'a -> extern_flags list -> bytes = "caml_output_value_to_string"
+(** [Marshal.to_bytes v flags] returns a byte sequence containing
+ the representation of [v].
The [flags] argument has the same meaning as for
{!Marshal.to_channel}. *)
-val to_buffer : string -> int -> int -> 'a -> extern_flags list -> int
+external to_string :
+ 'a -> extern_flags list -> string = "caml_output_value_to_string"
+(** Same as [to_bytes] but return the result as a string instead of
+ a byte sequence. *)
+
+val to_buffer : bytes -> int -> int -> 'a -> extern_flags list -> int
(** [Marshal.to_buffer buff ofs len v flags] marshals the value [v],
- storing its byte representation in the string [buff],
- starting at character number [ofs], and writing at most
- [len] characters. It returns the number of characters
- actually written to the string. If the byte representation
+ storing its byte representation in the sequence [buff],
+ starting at index [ofs], and writing at most
+ [len] bytes. It returns the number of bytes
+ actually written to the sequence. If the byte representation
of [v] does not fit in [len] characters, the exception [Failure]
is raised. *)
@@ -121,36 +136,41 @@ val from_channel : in_channel -> 'a
one of the [Marshal.to_*] functions, and reconstructs and
returns the corresponding value.*)
-val from_string : string -> int -> 'a
-(** [Marshal.from_string buff ofs] unmarshals a structured value
+val from_bytes : bytes -> int -> 'a
+(** [Marshal.from_bytes buff ofs] unmarshals a structured value
like {!Marshal.from_channel} does, except that the byte
representation is not read from a channel, but taken from
- the string [buff], starting at position [ofs]. *)
+ the byte sequence [buff], starting at position [ofs].
+ The byte sequence is not mutated. *)
+
+val from_string : string -> int -> 'a
+(** Same as [from_bytes] but take a string as argument instead of a
+ byte sequence. *)
val header_size : int
(** The bytes representing a marshaled value are composed of
a fixed-size header and a variable-sized data part,
whose size can be determined from the header.
- {!Marshal.header_size} is the size, in characters, of the header.
- {!Marshal.data_size}[ buff ofs] is the size, in characters,
+ {!Marshal.header_size} is the size, in bytes, of the header.
+ {!Marshal.data_size}[ buff ofs] is the size, in bytes,
of the data part, assuming a valid header is stored in
[buff] starting at position [ofs].
Finally, {!Marshal.total_size} [buff ofs] is the total size,
- in characters, of the marshaled value.
+ in bytes, of the marshaled value.
Both {!Marshal.data_size} and {!Marshal.total_size} raise [Failure]
if [buff], [ofs] does not contain a valid header.
To read the byte representation of a marshaled value into
- a string buffer, the program needs to read first
- {!Marshal.header_size} characters into the buffer,
+ a byte sequence, the program needs to read first
+ {!Marshal.header_size} bytes into the sequence,
then determine the length of the remainder of the
representation using {!Marshal.data_size},
- make sure the buffer is large enough to hold the remaining
- data, then read it, and finally call {!Marshal.from_string}
+ make sure the sequence is large enough to hold the remaining
+ data, then read it, and finally call {!Marshal.from_bytes}
to unmarshal the value. *)
-val data_size : string -> int -> int
+val data_size : bytes -> int -> int
(** See {!Marshal.header_size}.*)
-val total_size : string -> int -> int
+val total_size : bytes -> int -> int
(** See {!Marshal.header_size}.*)
diff --git a/stdlib/nativeint.mli b/stdlib/nativeint.mli
index eb2dde2cf..3dce1b6c4 100644
--- a/stdlib/nativeint.mli
+++ b/stdlib/nativeint.mli
@@ -16,8 +16,8 @@
This module provides operations on the type [nativeint] of
signed 32-bit integers (on 32-bit platforms) or
signed 64-bit integers (on 64-bit platforms).
- This integer type has exactly the same width as that of a [long]
- integer type in the C compiler. All arithmetic operations over
+ This integer type has exactly the same width as that of a
+ pointer type in the C compiler. All arithmetic operations over
[nativeint] are taken modulo 2{^32} or 2{^64} depending
on the word size of the architecture.
diff --git a/stdlib/obj.ml b/stdlib/obj.ml
index a6f11586e..ac9695cdb 100644
--- a/stdlib/obj.ml
+++ b/stdlib/obj.ml
@@ -33,9 +33,9 @@ external truncate : t -> int -> unit = "caml_obj_truncate"
external add_offset : t -> Int32.t -> t = "caml_obj_add_offset"
let marshal (obj : t) =
- Marshal.to_string obj []
+ Marshal.to_bytes obj []
let unmarshal str pos =
- (Marshal.from_string str pos, pos + Marshal.total_size str pos)
+ (Marshal.from_bytes str pos, pos + Marshal.total_size str pos)
let lazy_tag = 246
let closure_tag = 247
@@ -56,3 +56,33 @@ let final_tag = custom_tag
let int_tag = 1000
let out_of_heap_tag = 1001
let unaligned_tag = 1002
+
+let extension_slot x =
+ let x = repr x in
+ let slot =
+ if (is_block x) && (tag x) <> object_tag && (size x) >= 1 then field x 0
+ else x
+ in
+ let name =
+ if (is_block slot) && (tag slot) = object_tag then field slot 0
+ else raise Not_found
+ in
+ if (tag name) = string_tag then slot
+ else raise Not_found
+
+let extension_name x =
+ try
+ let slot = extension_slot x in
+ (obj (field slot 0) : string)
+ with Not_found -> invalid_arg "Obj.extension_name"
+
+let extension_id x =
+ try
+ let slot = extension_slot x in
+ (obj (field slot 1) : int)
+ with Not_found -> invalid_arg "Obj.extension_id"
+
+let extension_slot x =
+ try
+ extension_slot x
+ with Not_found -> invalid_arg "Obj.extension_slot"
diff --git a/stdlib/obj.mli b/stdlib/obj.mli
index 9a5bd721d..08b8a4f64 100644
--- a/stdlib/obj.mli
+++ b/stdlib/obj.mli
@@ -43,18 +43,22 @@ val infix_tag : int
val forward_tag : int
val no_scan_tag : int
val abstract_tag : int
-val string_tag : int
+val string_tag : int (* both [string] and [bytes] *)
val double_tag : int
val double_array_tag : int
val custom_tag : int
-val final_tag : int (* DEPRECATED *)
+val final_tag : int [@@ocaml.deprecated]
val int_tag : int
val out_of_heap_tag : int
val unaligned_tag : int (* should never happen @since 3.11.0 *)
+val extension_name : 'a -> string
+val extension_id : 'a -> int
+val extension_slot : 'a -> t
+
(** The following two functions are deprecated. Use module {!Marshal}
instead. *)
-val marshal : t -> string
-val unmarshal : string -> int -> t * int
+val marshal : t -> bytes [@@ocaml.deprecated]
+val unmarshal : bytes -> int -> t * int [@@ocaml.deprecated]
diff --git a/stdlib/parsing.ml b/stdlib/parsing.ml
index 762128244..47e151e1b 100644
--- a/stdlib/parsing.ml
+++ b/stdlib/parsing.ml
@@ -84,10 +84,10 @@ external set_trace: bool -> bool
= "caml_set_parser_trace"
let env =
- { s_stack = Array.create 100 0;
- v_stack = Array.create 100 (Obj.repr ());
- symb_start_stack = Array.create 100 dummy_pos;
- symb_end_stack = Array.create 100 dummy_pos;
+ { s_stack = Array.make 100 0;
+ v_stack = Array.make 100 (Obj.repr ());
+ symb_start_stack = Array.make 100 dummy_pos;
+ symb_end_stack = Array.make 100 dummy_pos;
stacksize = 100;
stackbase = 0;
curr_char = 0;
@@ -104,10 +104,10 @@ let env =
let grow_stacks() =
let oldsize = env.stacksize in
let newsize = oldsize * 2 in
- let new_s = Array.create newsize 0
- and new_v = Array.create newsize (Obj.repr ())
- and new_start = Array.create newsize dummy_pos
- and new_end = Array.create newsize dummy_pos in
+ let new_s = Array.make newsize 0
+ and new_v = Array.make newsize (Obj.repr ())
+ and new_start = Array.make newsize dummy_pos
+ and new_end = Array.make newsize dummy_pos in
Array.blit env.s_stack 0 new_s 0 oldsize;
env.s_stack <- new_s;
Array.blit env.v_stack 0 new_v 0 oldsize;
diff --git a/stdlib/pervasives.ml b/stdlib/pervasives.ml
index e4a07c3de..6b7165206 100644
--- a/stdlib/pervasives.ml
+++ b/stdlib/pervasives.ml
@@ -37,6 +37,18 @@ exception Exit
external ( |> ) : 'a -> ('a -> 'b) -> 'b = "%revapply"
external ( @@ ) : ('a -> 'b) -> 'a -> 'b = "%apply"
+(* Debugging *)
+
+external __LOC__ : string = "%loc_LOC"
+external __FILE__ : string = "%loc_FILE"
+external __LINE__ : int = "%loc_LINE"
+external __MODULE__ : string = "%loc_MODULE"
+external __POS__ : string * int * int * int = "%loc_POS"
+
+external __LOC_OF__ : 'a -> string * 'a = "%loc_LOC"
+external __LINE_OF__ : 'a -> int * 'a = "%loc_LINE"
+external __POS_OF__ : 'a -> (string * int * int * int) * 'a = "%loc_POS"
+
(* Comparisons *)
external ( = ) : 'a -> 'a -> bool = "%equal"
@@ -69,7 +81,7 @@ external succ : int -> int = "%succint"
external pred : int -> int = "%predint"
external ( + ) : int -> int -> int = "%addint"
external ( - ) : int -> int -> int = "%subint"
-external ( * ) : int -> int -> int = "%mulint"
+external ( * ) : int -> int -> int = "%mulint"
external ( / ) : int -> int -> int = "%divint"
external ( mod ) : int -> int -> int = "%modint"
@@ -150,19 +162,24 @@ type fpclass =
| FP_nan
external classify_float : float -> fpclass = "caml_classify_float"
-(* String operations -- more in module String *)
+(* String and byte sequence operations -- more in modules String and Bytes *)
external string_length : string -> int = "%string_length"
-external string_create : int -> string = "caml_create_string"
-external string_blit : string -> int -> string -> int -> int -> unit
+external bytes_length : bytes -> int = "%string_length"
+external bytes_create : int -> bytes = "caml_create_string"
+external string_blit : string -> int -> bytes -> int -> int -> unit
= "caml_blit_string" "noalloc"
+external bytes_blit : bytes -> int -> bytes -> int -> int -> unit
+ = "caml_blit_string" "noalloc"
+external bytes_unsafe_to_string : bytes -> string = "%identity"
+external bytes_unsafe_of_string : string -> bytes = "%identity"
let ( ^ ) s1 s2 =
let l1 = string_length s1 and l2 = string_length s2 in
- let s = string_create (l1 + l2) in
+ let s = bytes_create (l1 + l2) in
string_blit s1 0 s 0 l1;
string_blit s2 0 s l1 l2;
- s
+ bytes_unsafe_to_string s
(* Character operations -- more in module Char *)
@@ -180,6 +197,15 @@ external ignore : 'a -> unit = "%ignore"
external fst : 'a * 'b -> 'a = "%field0"
external snd : 'a * 'b -> 'b = "%field1"
+(* References *)
+
+type 'a ref = { mutable contents : 'a }
+external ref : 'a -> 'a ref = "%makemutable"
+external ( ! ) : 'a ref -> 'a = "%field0"
+external ( := ) : 'a ref -> 'a -> unit = "%setfield0"
+external incr : int ref -> unit = "%incr"
+external decr : int ref -> unit = "%decr"
+
(* String conversion functions *)
external format_int : string -> int -> string = "caml_format_int"
@@ -196,16 +222,13 @@ let string_of_int n =
format_int "%d" n
external int_of_string : string -> int = "caml_int_of_string"
-
-module String = struct
- external get : string -> int -> char = "%string_safe_get"
-end
+external string_get : string -> int -> char = "%string_safe_get"
let valid_float_lexem s =
let l = string_length s in
let rec loop i =
if i >= l then s ^ "." else
- match s.[i] with
+ match string_get s i with
| '0' .. '9' | '-' -> loop (i + 1)
| _ -> s
in
@@ -265,19 +288,29 @@ let flush_all () =
| a :: l -> (try flush a with _ -> ()); iter l
in iter (out_channels_list ())
-external unsafe_output : out_channel -> string -> int -> int -> unit
+external unsafe_output : out_channel -> bytes -> int -> int -> unit
= "caml_ml_output"
+external unsafe_output_string : out_channel -> string -> int -> int -> unit
+ = "caml_ml_output"
external output_char : out_channel -> char -> unit = "caml_ml_output_char"
+let output_bytes oc s =
+ unsafe_output oc s 0 (bytes_length s)
+
let output_string oc s =
- unsafe_output oc s 0 (string_length s)
+ unsafe_output_string oc s 0 (string_length s)
let output oc s ofs len =
- if ofs < 0 || len < 0 || ofs > string_length s - len
+ if ofs < 0 || len < 0 || ofs > bytes_length s - len
then invalid_arg "output"
else unsafe_output oc s ofs len
+let output_substring oc s ofs len =
+ if ofs < 0 || len < 0 || ofs > string_length s - len
+ then invalid_arg "output_substring"
+ else unsafe_output_string oc s ofs len
+
external output_byte : out_channel -> int -> unit = "caml_ml_output_char"
external output_binary_int : out_channel -> int -> unit = "caml_ml_output_int"
@@ -309,11 +342,11 @@ let open_in_bin name =
external input_char : in_channel -> char = "caml_ml_input_char"
-external unsafe_input : in_channel -> string -> int -> int -> int
+external unsafe_input : in_channel -> bytes -> int -> int -> int
= "caml_ml_input"
let input ic s ofs len =
- if ofs < 0 || len < 0 || ofs > string_length s - len
+ if ofs < 0 || len < 0 || ofs > bytes_length s - len
then invalid_arg "input"
else unsafe_input ic s ofs len
@@ -326,39 +359,44 @@ let rec unsafe_really_input ic s ofs len =
end
let really_input ic s ofs len =
- if ofs < 0 || len < 0 || ofs > string_length s - len
+ if ofs < 0 || len < 0 || ofs > bytes_length s - len
then invalid_arg "really_input"
else unsafe_really_input ic s ofs len
+let really_input_string ic len =
+ let s = bytes_create len in
+ really_input ic s 0 len;
+ bytes_unsafe_to_string s
+
external input_scan_line : in_channel -> int = "caml_ml_input_scan_line"
let input_line chan =
let rec build_result buf pos = function
[] -> buf
| hd :: tl ->
- let len = string_length hd in
- string_blit hd 0 buf (pos - len) len;
+ let len = bytes_length hd in
+ bytes_blit hd 0 buf (pos - len) len;
build_result buf (pos - len) tl in
let rec scan accu len =
let n = input_scan_line chan in
if n = 0 then begin (* n = 0: we are at EOF *)
match accu with
[] -> raise End_of_file
- | _ -> build_result (string_create len) len accu
+ | _ -> build_result (bytes_create len) len accu
end else if n > 0 then begin (* n > 0: newline found in buffer *)
- let res = string_create (n - 1) in
+ let res = bytes_create (n - 1) in
ignore (unsafe_input chan res 0 (n - 1));
ignore (input_char chan); (* skip the newline *)
match accu with
[] -> res
| _ -> let len = len + n - 1 in
- build_result (string_create len) len (res :: accu)
+ build_result (bytes_create len) len (res :: accu)
end else begin (* n < 0: newline not found *)
- let beg = string_create (-n) in
+ let beg = bytes_create (-n) in
ignore(unsafe_input chan beg 0 (-n));
scan (beg :: accu) (len - n)
end
- in scan [] 0
+ in bytes_unsafe_to_string (scan [] 0)
external input_byte : in_channel -> int = "caml_ml_input_char"
external input_binary_int : in_channel -> int = "caml_ml_input_int"
@@ -375,6 +413,7 @@ external set_binary_mode_in : in_channel -> bool -> unit
let print_char c = output_char stdout c
let print_string s = output_string stdout s
+let print_bytes s = output_bytes stdout s
let print_int i = output_string stdout (string_of_int i)
let print_float f = output_string stdout (string_of_float f)
let print_endline s =
@@ -385,6 +424,7 @@ let print_newline () = output_char stdout '\n'; flush stdout
let prerr_char c = output_char stderr c
let prerr_string s = output_string stderr s
+let prerr_bytes s = output_bytes stderr s
let prerr_int i = output_string stderr (string_of_int i)
let prerr_float f = output_string stderr (string_of_float f)
let prerr_endline s =
@@ -410,43 +450,26 @@ module LargeFile =
external in_channel_length : in_channel -> int64 = "caml_ml_channel_size_64"
end
-(* References *)
+(* Formats *)
-type 'a ref = { mutable contents : 'a }
-external ref : 'a -> 'a ref = "%makemutable"
-external ( ! ) : 'a ref -> 'a = "%field0"
-external ( := ) : 'a ref -> 'a -> unit = "%setfield0"
-external incr : int ref -> unit = "%incr"
-external decr : int ref -> unit = "%decr"
+type ('a, 'b, 'c, 'd, 'e, 'f) format6
+ = ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.format6
+ = Format of ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.fmt
+ * string
-(* Formats *)
type ('a, 'b, 'c, 'd) format4 = ('a, 'b, 'c, 'c, 'c, 'd) format6
type ('a, 'b, 'c) format = ('a, 'b, 'c, 'c) format4
+let string_of_format (Format (fmt, str)) = str
+
external format_of_string :
('a, 'b, 'c, 'd, 'e, 'f) format6 ->
('a, 'b, 'c, 'd, 'e, 'f) format6 = "%identity"
-external format_to_string :
- ('a, 'b, 'c, 'd, 'e, 'f) format6 -> string = "%identity"
-external string_to_format :
- string -> ('a, 'b, 'c, 'd, 'e, 'f) format6 = "%identity"
-
-let (( ^^ ) :
- ('a, 'b, 'c, 'd, 'e, 'f) format6 ->
- ('f, 'b, 'c, 'e, 'g, 'h) format6 ->
- ('a, 'b, 'c, 'd, 'g, 'h) format6) =
- fun fmt1 fmt2 ->
- string_to_format (format_to_string fmt1 ^ "%," ^ format_to_string fmt2)
-;;
-
-let string_of_format fmt =
- let s = format_to_string fmt in
- let l = string_length s in
- let r = string_create l in
- string_blit s 0 r 0 l;
- r
+let (^^) (Format (fmt1, str1)) (Format (fmt2, str2)) =
+ Format (CamlinternalFormatBasics.concat_fmt fmt1 fmt2,
+ str1 ^ "%," ^ str2)
(* Miscellaneous *)
diff --git a/stdlib/pervasives.mli b/stdlib/pervasives.mli
index 0a2e4af6a..d471a4ebb 100644
--- a/stdlib/pervasives.mli
+++ b/stdlib/pervasives.mli
@@ -14,8 +14,8 @@
(** The initially opened module.
This module provides the basic operations over the built-in types
- (numbers, booleans, strings, exceptions, references, lists, arrays,
- input-output channels, ...).
+ (numbers, booleans, byte sequences, strings, exceptions, references,
+ lists, arrays, input-output channels, ...).
This module is automatically opened at the beginning of each compilation.
All components of this module can therefore be referred by their short
@@ -68,7 +68,7 @@ external ( <= ) : 'a -> 'a -> bool = "%lessequal"
external ( >= ) : 'a -> 'a -> bool = "%greaterequal"
(** Structural ordering functions. These functions coincide with
- the usual orderings over integers, characters, strings
+ the usual orderings over integers, characters, strings, byte sequences
and floating-point numbers, and extend them to a
total ordering over all types.
The ordering is compatible with [( = )]. As in the case
@@ -107,7 +107,7 @@ val max : 'a -> 'a -> 'a
external ( == ) : 'a -> 'a -> bool = "%eq"
(** [e1 == e2] tests for physical equality of [e1] and [e2].
- On mutable types such as references, arrays, strings, records with
+ On mutable types such as references, arrays, byte sequences, records with
mutable fields and objects with mutable instance variables,
[e1 == e2] is true if and only if physical modification of [e1]
also affects [e2].
@@ -130,6 +130,7 @@ external ( && ) : bool -> bool -> bool = "%sequand"
[e2] is not evaluated at all. *)
external ( & ) : bool -> bool -> bool = "%sequand"
+ [@@ocaml.deprecated]
(** @deprecated {!Pervasives.( && )} should be used instead. *)
external ( || ) : bool -> bool -> bool = "%sequor"
@@ -138,8 +139,47 @@ external ( || ) : bool -> bool -> bool = "%sequor"
[e2] is not evaluated at all. *)
external ( or ) : bool -> bool -> bool = "%sequor"
+ [@@ocaml.deprecated]
(** @deprecated {!Pervasives.( || )} should be used instead.*)
+(** {6 Debugging} *)
+
+external __LOC__ : string = "%loc_LOC"
+(** [__LOC__] returns the location at which this expression appears in
+ the file currently being parsed by the compiler, with the standard
+ error format of OCaml: "File %S, line %d, characters %d-%d" *)
+external __FILE__ : string = "%loc_FILE"
+(** [__FILE__] returns the name of the file currently being
+ parsed by the compiler. *)
+external __LINE__ : int = "%loc_LINE"
+(** [__LINE__] returns the line number at which this expression
+ appears in the file currently being parsed by the compiler. *)
+external __MODULE__ : string = "%loc_MODULE"
+(** [__MODULE__] returns the module name of the file being
+ parsed by the compiler. *)
+external __POS__ : string * int * int * int = "%loc_POS"
+(** [__POS__] returns a tuple [(file,lnum,cnum,enum)], corresponding
+ to the location at which this expression appears in the file
+ currently being parsed by the compiler. [file] is the current
+ filename, [lnum] the line number, [cnum] the character position in
+ the line and [enum] the last character position in the line. *)
+
+external __LOC_OF__ : 'a -> string * 'a = "%loc_LOC"
+(** [__LOC_OF__ expr] returns a pair [(loc, expr)] where [loc] is the
+ location of [expr] in the file currently being parsed by the
+ compiler, with the standard error format of OCaml: "File %S, line
+ %d, characters %d-%d" *)
+external __LINE_OF__ : 'a -> int * 'a = "%loc_LINE"
+(** [__LINE__ expr] returns a pair [(line, expr)], where [line] is the
+ line number at which the expression [expr] appears in the file
+ currently being parsed by the compiler. *)
+external __POS_OF__ : 'a -> (string * int * int * int) * 'a = "%loc_POS"
+(** [__POS_OF__ expr] returns a pair [(expr,loc)], where [loc] is a
+ tuple [(file,lnum,cnum,enum)] corresponding to the location at
+ which the expression [expr] appears in the file currently being
+ parsed by the compiler. [file] is the current filename, [lnum] the
+ line number, [cnum] the character position in the line and [enum]
+ the last character position in the line. *)
(** {6 Composition operators} *)
@@ -553,6 +593,9 @@ val print_char : char -> unit
val print_string : string -> unit
(** Print a string on standard output. *)
+val print_bytes : bytes -> unit
+(** Print a byte sequence on standard output. *)
+
val print_int : int -> unit
(** Print an integer, in decimal, on standard output. *)
@@ -577,6 +620,9 @@ val prerr_char : char -> unit
val prerr_string : string -> unit
(** Print a string on standard error. *)
+val prerr_bytes : bytes -> unit
+(** Print a byte sequence on standard error. *)
+
val prerr_int : int -> unit
(** Print an integer, in decimal, on standard error. *)
@@ -584,8 +630,8 @@ val prerr_float : float -> unit
(** Print a floating-point number, in decimal, on standard error. *)
val prerr_endline : string -> unit
-(** Print a string, followed by a newline character on standard error
- and flush standard error. *)
+(** Print a string, followed by a newline character on standard
+ error and flush standard error. *)
val prerr_newline : unit -> unit
(** Print a newline character on standard error, and flush
@@ -661,11 +707,18 @@ val output_char : out_channel -> char -> unit
val output_string : out_channel -> string -> unit
(** Write the string on the given output channel. *)
-val output : out_channel -> string -> int -> int -> unit
-(** [output oc buf pos len] writes [len] characters from string [buf],
+val output_bytes : out_channel -> bytes -> unit
+(** Write the byte sequence on the given output channel. *)
+
+val output : out_channel -> bytes -> int -> int -> unit
+(** [output oc buf pos len] writes [len] characters from byte sequence [buf],
starting at offset [pos], to the given output channel [oc].
Raise [Invalid_argument "output"] if [pos] and [len] do not
- designate a valid substring of [buf]. *)
+ designate a valid range of [buf]. *)
+
+val output_substring : out_channel -> string -> int -> int -> unit
+(** Same as [output] but take a string as argument instead of
+ a byte sequence. *)
val output_byte : out_channel -> int -> unit
(** Write one 8-bit integer (as the single character with that code)
@@ -756,9 +809,9 @@ val input_line : in_channel -> string
Raise [End_of_file] if the end of the file is reached
at the beginning of line. *)
-val input : in_channel -> string -> int -> int -> int
+val input : in_channel -> bytes -> int -> int -> int
(** [input ic buf pos len] reads up to [len] characters from
- the given channel [ic], storing them in string [buf], starting at
+ the given channel [ic], storing them in byte sequence [buf], starting at
character number [pos].
It returns the actual number of characters read, between 0 and
[len] (inclusive).
@@ -771,15 +824,21 @@ val input : in_channel -> string -> int -> int -> int
if desired. (See also {!Pervasives.really_input} for reading
exactly [len] characters.)
Exception [Invalid_argument "input"] is raised if [pos] and [len]
- do not designate a valid substring of [buf]. *)
+ do not designate a valid range of [buf]. *)
-val really_input : in_channel -> string -> int -> int -> unit
+val really_input : in_channel -> bytes -> int -> int -> unit
(** [really_input ic buf pos len] reads [len] characters from channel [ic],
- storing them in string [buf], starting at character number [pos].
+ storing them in byte sequence [buf], starting at character number [pos].
Raise [End_of_file] if the end of file is reached before [len]
characters have been read.
Raise [Invalid_argument "really_input"] if
- [pos] and [len] do not designate a valid substring of [buf]. *)
+ [pos] and [len] do not designate a valid range of [buf]. *)
+
+val really_input_string : in_channel -> int -> string
+(** [really_input_string ic len] reads [len] characters from channel [ic]
+ and returns them in a new string.
+ Raise [End_of_file] if the end of file is reached before [len]
+ characters have been read. *)
val input_byte : in_channel -> int
(** Same as {!Pervasives.input_char}, but return the 8-bit integer representing
@@ -910,7 +969,7 @@ external decr : int ref -> unit = "%decr"
*)
(** Format strings have a general and highly polymorphic type
- [('a, 'b, 'c, 'd, 'e, 'f) format6]. Type [format6] is built in.
+ [('a, 'b, 'c, 'd, 'e, 'f) format6].
The two simplified types, [format] and [format4] below are
included for backward compatibility with earlier releases of
OCaml.
@@ -949,6 +1008,10 @@ external decr : int ref -> unit = "%decr"
for the [scanf]-style functions, it is typically the result type of the
receiver function.
*)
+
+type ('a, 'b, 'c, 'd, 'e, 'f) format6 =
+ ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.format6
+
type ('a, 'b, 'c, 'd) format4 = ('a, 'b, 'c, 'c, 'c, 'd) format6
type ('a, 'b, 'c) format = ('a, 'b, 'c, 'c) format4
@@ -1003,6 +1066,6 @@ val at_exit : (unit -> unit) -> unit
val valid_float_lexem : string -> string
-val unsafe_really_input : in_channel -> string -> int -> int -> unit
+val unsafe_really_input : in_channel -> bytes -> int -> int -> unit
val do_at_exit : unit -> unit
diff --git a/stdlib/printexc.ml b/stdlib/printexc.ml
index 9f20c7b46..4ebb84cea 100644
--- a/stdlib/printexc.ml
+++ b/stdlib/printexc.ml
@@ -82,12 +82,13 @@ let catch fct arg =
eprintf "Uncaught exception: %s\n" (to_string x);
exit 2
-type raw_backtrace
+type raw_backtrace_slot
+type raw_backtrace = raw_backtrace_slot array
external get_raw_backtrace:
unit -> raw_backtrace = "caml_get_exception_raw_backtrace"
-type loc_info =
+type backtrace_slot =
| Known_location of bool (* is_raise *)
* string (* filename *)
* int (* line number *)
@@ -98,29 +99,27 @@ type loc_info =
(* to avoid warning *)
let _ = [Known_location (false, "", 0, 0, 0); Unknown_location false]
-type backtrace = loc_info array
+external convert_raw_backtrace_slot:
+ raw_backtrace_slot -> backtrace_slot = "caml_convert_raw_backtrace_slot"
-external convert_raw_backtrace:
- raw_backtrace -> backtrace option = "caml_convert_raw_backtrace"
+let convert_raw_backtrace rbckt =
+ try Some (Array.map convert_raw_backtrace_slot rbckt)
+ with Failure _ -> None
-let format_loc_info pos li =
- let is_raise =
- match li with
- | Known_location(is_raise, _, _, _, _) -> is_raise
- | Unknown_location(is_raise) -> is_raise in
- let info =
+let format_backtrace_slot pos slot =
+ let info is_raise =
if is_raise then
if pos = 0 then "Raised at" else "Re-raised at"
else
if pos = 0 then "Raised by primitive operation at" else "Called from"
in
- match li with
+ match slot with
+ | Unknown_location true -> (* compiler-inserted re-raise, skipped *) None
+ | Unknown_location false ->
+ Some (sprintf "%s unknown location" (info false))
| Known_location(is_raise, filename, lineno, startchar, endchar) ->
- sprintf "%s file \"%s\", line %d, characters %d-%d"
- info filename lineno startchar endchar
- | Unknown_location(is_raise) ->
- sprintf "%s unknown location"
- info
+ Some (sprintf "%s file \"%s\", line %d, characters %d-%d"
+ (info is_raise) filename lineno startchar endchar)
let print_exception_backtrace outchan backtrace =
match backtrace with
@@ -129,8 +128,9 @@ let print_exception_backtrace outchan backtrace =
"(Program not linked with -g, cannot print stack backtrace)\n"
| Some a ->
for i = 0 to Array.length a - 1 do
- if a.(i) <> Unknown_location true then
- fprintf outchan "%s\n" (format_loc_info i a.(i))
+ match format_backtrace_slot i a.(i) with
+ | None -> ()
+ | Some str -> fprintf outchan "%s\n" str
done
let print_raw_backtrace outchan raw_backtrace =
@@ -147,20 +147,70 @@ let backtrace_to_string backtrace =
| Some a ->
let b = Buffer.create 1024 in
for i = 0 to Array.length a - 1 do
- if a.(i) <> Unknown_location true then
- bprintf b "%s\n" (format_loc_info i a.(i))
+ match format_backtrace_slot i a.(i) with
+ | None -> ()
+ | Some str -> bprintf b "%s\n" str
done;
Buffer.contents b
let raw_backtrace_to_string raw_backtrace =
backtrace_to_string (convert_raw_backtrace raw_backtrace)
+let backtrace_slot_is_raise = function
+ | Known_location(is_raise, _, _, _, _) -> is_raise
+ | Unknown_location(is_raise) -> is_raise
+
+type location = {
+ filename : string;
+ line_number : int;
+ start_char : int;
+ end_char : int;
+}
+
+let backtrace_slot_location = function
+ | Unknown_location _ -> None
+ | Known_location(_is_raise, filename, line_number,
+ start_char, end_char) ->
+ Some {
+ filename;
+ line_number;
+ start_char;
+ end_char;
+ }
+
+let backtrace_slots raw_backtrace =
+ (* The documentation of this function guarantees that Some is
+ returned only if a part of the trace is usable. This gives us
+ a bit more work than just convert_raw_backtrace, but it makes the
+ API more user-friendly -- otherwise most users would have to
+ reimplement the "Program not linked with -g, sorry" logic
+ themselves. *)
+ match convert_raw_backtrace raw_backtrace with
+ | None -> None
+ | Some backtrace ->
+ let usable_slot = function
+ | Unknown_location _ -> false
+ | Known_location _ -> true in
+ let rec exists_usable = function
+ | (-1) -> false
+ | i -> usable_slot backtrace.(i) || exists_usable (i - 1) in
+ if exists_usable (Array.length backtrace - 1)
+ then Some backtrace
+ else None
+
+module Slot = struct
+ type t = backtrace_slot
+ let format = format_backtrace_slot
+ let is_raise = backtrace_slot_is_raise
+ let location = backtrace_slot_location
+end
+
+let raw_backtrace_length bckt = Array.length bckt
+let get_raw_backtrace_slot bckt i = Array.get bckt i
+
(* confusingly named:
returns the *string* corresponding to the global current backtrace *)
let get_backtrace () =
- (* we could use the caml_get_exception_backtrace primitive here, but
- we hope to deprecate it so it's better to just compose the
- raw stuff *)
backtrace_to_string (convert_raw_backtrace (get_raw_backtrace ()))
external record_backtrace: bool -> unit = "caml_record_backtrace"
@@ -169,10 +219,8 @@ external backtrace_status: unit -> bool = "caml_backtrace_status"
let register_printer fn =
printers := fn :: !printers
-
external get_callstack: int -> raw_backtrace = "caml_get_current_callstack"
-
let exn_slot x =
let x = Obj.repr x in
if Obj.tag x = 0 then Obj.field x 0 else x
@@ -184,3 +232,64 @@ let exn_slot_id x =
let exn_slot_name x =
let slot = exn_slot x in
(Obj.obj (Obj.field slot 0) : string)
+
+
+let uncaught_exception_handler = ref None
+
+let set_uncaught_exception_handler fn = uncaught_exception_handler := Some fn
+
+let empty_backtrace : raw_backtrace = Obj.obj (Obj.new_block Obj.abstract_tag 0)
+
+let try_get_raw_backtrace () =
+ try
+ get_raw_backtrace ()
+ with _ (* Out_of_memory? *) ->
+ empty_backtrace
+
+let handle_uncaught_exception' exn debugger_in_use =
+ try
+ (* Get the backtrace now, in case one of the [at_exit] function
+ destroys it. *)
+ let raw_backtrace =
+ if debugger_in_use (* Same test as in [byterun/printexc.c] *) then
+ empty_backtrace
+ else
+ try_get_raw_backtrace ()
+ in
+ (try Pervasives.do_at_exit () with _ -> ());
+ match !uncaught_exception_handler with
+ | None ->
+ eprintf "Fatal error: exception %s\n" (to_string exn);
+ print_raw_backtrace stderr raw_backtrace;
+ flush stderr
+ | Some handler ->
+ try
+ handler exn raw_backtrace
+ with exn' ->
+ let raw_backtrace' = try_get_raw_backtrace () in
+ eprintf "Fatal error: exception %s\n" (to_string exn);
+ print_raw_backtrace stderr raw_backtrace;
+ eprintf "Fatal error in uncaught exception handler: exception %s\n"
+ (to_string exn');
+ print_raw_backtrace stderr raw_backtrace';
+ flush stderr
+ with
+ | Out_of_memory ->
+ prerr_endline
+ "Fatal error: out of memory in uncaught exception handler"
+
+(* This function is called by [caml_fatal_uncaught_exception] in
+ [byterun/printexc.c] which expects no exception is raised. *)
+let handle_uncaught_exception exn debugger_in_use =
+ try
+ handle_uncaught_exception' exn debugger_in_use
+ with _ ->
+ (* There is not much we can do at this point *)
+ ()
+
+external register_named_value : string -> 'a -> unit
+ = "caml_register_named_value"
+
+let () =
+ register_named_value "Printexc.handle_uncaught_exception"
+ handle_uncaught_exception
diff --git a/stdlib/printexc.mli b/stdlib/printexc.mli
index c378d9cb3..6bffe174c 100644
--- a/stdlib/printexc.mli
+++ b/stdlib/printexc.mli
@@ -86,25 +86,46 @@ val register_printer: (exn -> string option) -> unit
(** {6 Raw backtraces} *)
type raw_backtrace
-
-(** The abstract type [backtrace] stores exception backtraces in
+(** The abstract type [raw_backtrace] stores a backtrace in
a low-level format, instead of directly exposing them as string as
the [get_backtrace()] function does.
This allows delaying the formatting of backtraces to when they are
- actually printed, which might be useful if you record more
+ actually printed, which may be useful if you record more
backtraces than you print.
+
+ Raw backtraces cannot be marshalled. If you need marshalling, you
+ should use the array returned by the [backtrace_slots] function of
+ the next section.
+
+ @since 4.01.0
*)
val get_raw_backtrace: unit -> raw_backtrace
+(** [Printexc.get_raw_backtrace ()] returns the same exception
+ backtrace that [Printexc.print_backtrace] would print, but in
+ a raw format.
+
+ @since 4.01.0
+*)
+
val print_raw_backtrace: out_channel -> raw_backtrace -> unit
+(** Print a raw backtrace in the same format
+ [Printexc.print_backtrace] uses.
+
+ @since 4.01.0
+*)
+
val raw_backtrace_to_string: raw_backtrace -> string
+(** Return a string from a raw backtrace, in the same format
+ [Printexc.get_backtrace] uses.
+ @since 4.01.0
+*)
(** {6 Current call stack} *)
val get_callstack: int -> raw_backtrace
-
(** [Printexc.get_callstack n] returns a description of the top of the
call stack on the current program point (for the current thread),
with at most [n] entries. (Note: this function is not related to
@@ -113,6 +134,138 @@ val get_callstack: int -> raw_backtrace
@since 4.01.0
*)
+(** {6 Uncaught exceptions} *)
+
+val set_uncaught_exception_handler: (exn -> raw_backtrace -> unit) -> unit
+(** [Printexc.set_uncaught_exception_handler fn] registers [fn] as the handler
+ for uncaught exceptions. The default handler prints the exception and
+ backtrace on standard error output.
+
+ Note that when [fn] is called all the functions registered with
+ {!Pervasives.at_exit} have already been called. Because of this you must
+ make sure any output channel [fn] writes on is flushed.
+
+ If [fn] raises an exception, it is ignored.
+
+ @since 4.02.0
+*)
+
+
+(** {6 Manipulation of backtrace information}
+
+ Those function allow to traverse the slots of a raw backtrace,
+ extract information from them in a programmer-friendly format.
+*)
+
+type backtrace_slot
+(** The abstract type [backtrace_slot] represents a single slot of
+ a backtrace.
+
+ @since 4.02
+*)
+
+val backtrace_slots : raw_backtrace -> backtrace_slot array option
+(** Returns the slots of a raw backtrace, or [None] if none of them
+ contain useful information.
+
+ In the return array, the slot at index [0] corresponds to the most
+ recent function call, raise, or primitive [get_backtrace] call in
+ the trace.
+
+ Some possible reasons for returning [None] are as follow:
+ - none of the slots in the trace come from modules compiled with
+ debug information ([-g])
+ - the program is a bytecode program that has not been linked with
+ debug information enabled ([ocamlc -g])
+*)
+
+type location = {
+ filename : string;
+ line_number : int;
+ start_char : int;
+ end_char : int;
+}
+(** The type of location information found in backtraces. [start_char]
+ and [end_char] are positions relative to the beginning of the
+ line.
+
+ @since 4.02
+*)
+
+module Slot : sig
+ type t = backtrace_slot
+
+ val is_raise : t -> bool
+ (** [is_raise slot] is [true] when [slot] refers to a raising
+ point in the code, and [false] when it comes from a simple
+ function call.
+
+ @since 4.02
+ *)
+
+ val location : t -> location option
+ (** [location slot] returns the location information of the slot,
+ if available, and [None] otherwise.
+
+ Some possible reasons for failing to return a location are as follow:
+ - the slot corresponds to a compiler-inserted raise
+ - the slot corresponds to a part of the program that has not been
+ compiled with debug information ([-g])
+
+ @since 4.02
+ *)
+
+ val format : int -> t -> string option
+ (** [format pos slot] returns the string representation of [slot] as
+ [raw_backtrace_to_string] would format it, assuming it is the
+ [pos]-th element of the backtrace: the [0]-th element is
+ pretty-printed differently than the others.
+
+ Whole-backtrace printing functions also skip some uninformative
+ slots; in that case, [format pos slot] returns [None].
+
+ @since 4.02
+ *)
+end
+
+
+(** {6 Raw backtrace slots} *)
+
+type raw_backtrace_slot
+(** This type allows direct access to raw backtrace slots, without any
+ conversion in an OCaml-usable data-structure. Being
+ process-specific, they must absolutely not be marshalled, and are
+ unsafe to use for this reason (marshalling them may not fail, but
+ un-marshalling and using the result will result in
+ undefined behavior).
+
+ Elements of this type can still be compared and hashed: when two
+ elements are equal, then they represent the same source location
+ (the converse is not necessarily true in presence of inlining,
+ for example).
+*)
+
+val raw_backtrace_length : raw_backtrace -> int
+(** [raw_backtrace_length bckt] returns the number of slots in the
+ backtrace [bckt].
+
+ @since 4.02
+*)
+
+val get_raw_backtrace_slot : raw_backtrace -> int -> raw_backtrace_slot
+(** [get_slot bckt pos] returns the slot in position [pos] in the
+ backtrace [bckt].
+
+ @since 4.02
+*)
+
+val convert_raw_backtrace_slot : raw_backtrace_slot -> backtrace_slot
+(** Extracts the user-friendly [backtrace_slot] from a low-level
+ [raw_backtrace_slot].
+
+ @since 4.02
+*)
+
(** {6 Exception slots} *)
@@ -130,5 +283,3 @@ val exn_slot_name: exn -> string
@since 4.02.0
*)
-
-
diff --git a/stdlib/printf.ml b/stdlib/printf.ml
index 380169204..1152429f9 100644
--- a/stdlib/printf.ml
+++ b/stdlib/printf.ml
@@ -11,728 +11,29 @@
(* *)
(***********************************************************************)
-external format_float: string -> float -> string
- = "caml_format_float"
-external format_int: string -> int -> string
- = "caml_format_int"
-external format_int32: string -> int32 -> string
- = "caml_int32_format"
-external format_nativeint: string -> nativeint -> string
- = "caml_nativeint_format"
-external format_int64: string -> int64 -> string
- = "caml_int64_format"
-
-module Sformat = struct
-
- type index;;
-
- external unsafe_index_of_int : int -> index = "%identity"
- ;;
- let index_of_int i =
- if i >= 0 then unsafe_index_of_int i
- else failwith ("Sformat.index_of_int: negative argument " ^ string_of_int i)
- ;;
- external int_of_index : index -> int = "%identity"
- ;;
-
- let add_int_index i idx = index_of_int (i + int_of_index idx);;
- let succ_index = add_int_index 1;;
- (* Literal position are one-based (hence pred p instead of p). *)
- let index_of_literal_position p = index_of_int (pred p);;
-
- external length : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> int
- = "%string_length"
- ;;
- external get : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> int -> char
- = "%string_safe_get"
- ;;
- external unsafe_get : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> int -> char
- = "%string_unsafe_get"
- ;;
- external unsafe_to_string : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> string
- = "%identity"
- ;;
- let sub fmt idx len =
- String.sub (unsafe_to_string fmt) (int_of_index idx) len
- ;;
- let to_string fmt = sub fmt (unsafe_index_of_int 0) (length fmt)
- ;;
-
-end
-;;
-
-let bad_conversion sfmt i c =
- invalid_arg
- ("Printf: bad conversion %" ^ String.make 1 c ^ ", at char number " ^
- string_of_int i ^ " in format string \'" ^ sfmt ^ "\'")
-;;
-
-let bad_conversion_format fmt i c =
- bad_conversion (Sformat.to_string fmt) i c
-;;
-
-let incomplete_format fmt =
- invalid_arg
- ("Printf: premature end of format string \'" ^
- Sformat.to_string fmt ^ "\'")
-;;
-
-(* Parses a string conversion to return the specified length and the
- padding direction. *)
-let parse_string_conversion sfmt =
- let rec parse neg i =
- if i >= String.length sfmt then (0, neg) else
- match String.unsafe_get sfmt i with
- | '1'..'9' ->
- (int_of_string
- (String.sub sfmt i (String.length sfmt - i - 1)),
- neg)
- | '-' ->
- parse true (succ i)
- | _ ->
- parse neg (succ i) in
- try parse false 1 with
- | Failure _ -> bad_conversion sfmt 0 's'
-;;
-
-(* Pad a (sub) string into a blank string of length [p],
- on the right if [neg] is true, on the left otherwise. *)
-let pad_string pad_char p neg s i len =
- if p = len && i = 0 then s else
- if p <= len then String.sub s i len else
- let res = String.make p pad_char in
- if neg
- then String.blit s i res 0 len
- else String.blit s i res (p - len) len;
- res
-;;
-
-(* Format a string given a %s format, e.g. %40s or %-20s.
- To do ?: ignore other flags (#, +, etc). *)
-let format_string sfmt s =
- let (p, neg) = parse_string_conversion sfmt in
- pad_string ' ' p neg s 0 (String.length s)
-;;
-
-(* Extract a format string out of [fmt] between [start] and [stop] inclusive.
- ['*'] in the format are replaced by integers taken from the [widths] list.
- [extract_format] returns a string which is the string representation of
- the resulting format string. *)
-let extract_format fmt start stop widths =
- let skip_positional_spec start =
- match Sformat.unsafe_get fmt start with
- | '0'..'9' ->
- let rec skip_int_literal i =
- match Sformat.unsafe_get fmt i with
- | '0'..'9' -> skip_int_literal (succ i)
- | '$' -> succ i
- | _ -> start in
- skip_int_literal (succ start)
- | _ -> start in
- let start = skip_positional_spec (succ start) in
- let b = Buffer.create (stop - start + 10) in
- Buffer.add_char b '%';
- let rec fill_format i widths =
- if i <= stop then
- match (Sformat.unsafe_get fmt i, widths) with
- | ('*', h :: t) ->
- Buffer.add_string b (string_of_int h);
- let i = skip_positional_spec (succ i) in
- fill_format i t
- | ('*', []) ->
- assert false (* Should not happen since this is ill-typed. *)
- | (c, _) ->
- Buffer.add_char b c;
- fill_format (succ i) widths in
- fill_format start (List.rev widths);
- Buffer.contents b
-;;
-
-let extract_format_int conv fmt start stop widths =
- let sfmt = extract_format fmt start stop widths in
- match conv with
- | 'n' | 'N' ->
- sfmt.[String.length sfmt - 1] <- 'u';
- sfmt
- | _ -> sfmt
-;;
-
-let extract_format_float conv fmt start stop widths =
- let sfmt = extract_format fmt start stop widths in
- match conv with
- | 'F' ->
- sfmt.[String.length sfmt - 1] <- 'g';
- sfmt
- | _ -> sfmt
-;;
-
-(* Returns the position of the next character following the meta format
- string, starting from position [i], inside a given format [fmt].
- According to the character [conv], the meta format string is
- enclosed by the delimiters %{ and %} (when [conv = '{']) or %( and
- %) (when [conv = '(']). Hence, [sub_format] returns the index of
- the character following the [')'] or ['}'] that ends the meta format,
- according to the character [conv]. *)
-let sub_format incomplete_format bad_conversion_format conv fmt i =
- let len = Sformat.length fmt in
- let rec sub_fmt c i =
- let close = if c = '(' then ')' else (* '{' *) '}' in
- let rec sub j =
- if j >= len then incomplete_format fmt else
- match Sformat.get fmt j with
- | '%' -> sub_sub (succ j)
- | _ -> sub (succ j)
- and sub_sub j =
- if j >= len then incomplete_format fmt else
- match Sformat.get fmt j with
- | '(' | '{' as c ->
- let j = sub_fmt c (succ j) in
- sub (succ j)
- | '}' | ')' as c ->
- if c = close then succ j else bad_conversion_format fmt i c
- | _ -> sub (succ j) in
- sub i in
- sub_fmt conv i
-;;
-
-let sub_format_for_printf conv =
- sub_format incomplete_format bad_conversion_format conv
-;;
-
-let iter_on_format_args fmt add_conv add_char =
-
- let lim = Sformat.length fmt - 1 in
-
- let rec scan_flags skip i =
- if i > lim then incomplete_format fmt else
- match Sformat.unsafe_get fmt i with
- | '*' -> scan_flags skip (add_conv skip i 'i')
- (* | '$' -> scan_flags skip (succ i) *** PR#4321 *)
- | '#' | '-' | ' ' | '+' -> scan_flags skip (succ i)
- | '_' -> scan_flags true (succ i)
- | '0'..'9'
- | '.' -> scan_flags skip (succ i)
- | _ -> scan_conv skip i
- and scan_conv skip i =
- if i > lim then incomplete_format fmt else
- match Sformat.unsafe_get fmt i with
- | '%' | '@' | '!' | ',' -> succ i
- | 's' | 'S' | '[' -> add_conv skip i 's'
- | 'c' | 'C' -> add_conv skip i 'c'
- | 'd' | 'i' |'o' | 'u' | 'x' | 'X' | 'N' -> add_conv skip i 'i'
- | 'f' | 'e' | 'E' | 'g' | 'G' | 'F' -> add_conv skip i 'f'
- | 'B' | 'b' -> add_conv skip i 'B'
- | 'a' | 'r' | 't' as conv -> add_conv skip i conv
- | 'l' | 'n' | 'L' as conv ->
- let j = succ i in
- if j > lim then add_conv skip i 'i' else begin
- match Sformat.get fmt j with
- | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' ->
- add_char (add_conv skip i conv) 'i'
- | _ -> add_conv skip i 'i' end
- | '{' as conv ->
- (* Just get a regular argument, skipping the specification. *)
- let i = add_conv skip i conv in
- (* To go on, find the index of the next char after the meta format. *)
- let j = sub_format_for_printf conv fmt i in
- (* Add the meta specification to the summary anyway. *)
- let rec loop i =
- if i < j - 2 then loop (add_char i (Sformat.get fmt i)) in
- loop i;
- (* Go on, starting at the closing brace to properly close the meta
- specification in the summary. *)
- scan_conv skip (j - 1)
- | '(' as conv ->
- (* Use the static format argument specification instead of
- the runtime format argument value: they must have the same type
- anyway. *)
- scan_fmt (add_conv skip i conv)
- | '}' | ')' as conv -> add_conv skip i conv
- | conv -> bad_conversion_format fmt i conv
-
- and scan_fmt i =
- if i < lim then
- if Sformat.get fmt i = '%'
- then scan_fmt (scan_flags false (succ i))
- else scan_fmt (succ i)
- else i in
-
- ignore (scan_fmt 0)
-;;
-
-(* Returns a string that summarizes the typing information that a given
- format string contains.
- For instance, [summarize_format_type "A number %d\n"] is "%i".
- It also checks the well-formedness of the format string. *)
-let summarize_format_type fmt =
- let len = Sformat.length fmt in
- let b = Buffer.create len in
- let add_char i c = Buffer.add_char b c; succ i in
- let add_conv skip i c =
- if skip then Buffer.add_string b "%_" else Buffer.add_char b '%';
- add_char i c in
- iter_on_format_args fmt add_conv add_char;
- Buffer.contents b
-;;
-
-module Ac = struct
- type ac = {
- mutable ac_rglr : int;
- mutable ac_skip : int;
- mutable ac_rdrs : int;
- }
-end
-;;
-
-open Ac;;
-
-(* Computes the number of arguments of a format (including the flag
- arguments if any). *)
-let ac_of_format fmt =
- let ac = { ac_rglr = 0; ac_skip = 0; ac_rdrs = 0; } in
- let incr_ac skip c =
- let inc = if c = 'a' then 2 else 1 in
- if c = 'r' then ac.ac_rdrs <- ac.ac_rdrs + 1;
- if skip
- then ac.ac_skip <- ac.ac_skip + inc
- else ac.ac_rglr <- ac.ac_rglr + inc in
- let add_conv skip i c =
- (* Just finishing a meta format: no additional argument to record. *)
- if c <> ')' && c <> '}' then incr_ac skip c;
- succ i
- and add_char i _ = succ i in
-
- iter_on_format_args fmt add_conv add_char;
- ac
-;;
-
-let count_printing_arguments_of_format fmt =
- let ac = ac_of_format fmt in
- (* For printing, only the regular arguments have to be counted. *)
- ac.ac_rglr
-;;
-
-let list_iter_i f l =
- let rec loop i = function
- | [] -> ()
- | [x] -> f i x (* Tail calling [f] *)
- | x :: xs -> f i x; loop (succ i) xs in
- loop 0 l
-;;
-
-(* 'Abstracting' version of kprintf: returns a (curried) function that
- will print when totally applied.
- Note: in the following, we are careful not to be badly caught
- by the compiler optimizations for the representation of arrays. *)
-let kapr kpr fmt =
- match count_printing_arguments_of_format fmt with
- | 0 -> kpr fmt [||]
- | 1 -> Obj.magic (fun x ->
- let a = Array.make 1 (Obj.repr 0) in
- a.(0) <- x;
- kpr fmt a)
- | 2 -> Obj.magic (fun x y ->
- let a = Array.make 2 (Obj.repr 0) in
- a.(0) <- x; a.(1) <- y;
- kpr fmt a)
- | 3 -> Obj.magic (fun x y z ->
- let a = Array.make 3 (Obj.repr 0) in
- a.(0) <- x; a.(1) <- y; a.(2) <- z;
- kpr fmt a)
- | 4 -> Obj.magic (fun x y z t ->
- let a = Array.make 4 (Obj.repr 0) in
- a.(0) <- x; a.(1) <- y; a.(2) <- z;
- a.(3) <- t;
- kpr fmt a)
- | 5 -> Obj.magic (fun x y z t u ->
- let a = Array.make 5 (Obj.repr 0) in
- a.(0) <- x; a.(1) <- y; a.(2) <- z;
- a.(3) <- t; a.(4) <- u;
- kpr fmt a)
- | 6 -> Obj.magic (fun x y z t u v ->
- let a = Array.make 6 (Obj.repr 0) in
- a.(0) <- x; a.(1) <- y; a.(2) <- z;
- a.(3) <- t; a.(4) <- u; a.(5) <- v;
- kpr fmt a)
- | nargs ->
- let rec loop i args =
- if i >= nargs then
- let a = Array.make nargs (Obj.repr 0) in
- list_iter_i (fun i arg -> a.(nargs - i - 1) <- arg) args;
- kpr fmt a
- else Obj.magic (fun x -> loop (succ i) (x :: args)) in
- loop 0 []
-;;
-
-type positional_specification =
- | Spec_none | Spec_index of Sformat.index
-;;
-
-(* To scan an optional positional parameter specification,
- i.e. an integer followed by a [$].
-
- Calling [got_spec] with appropriate arguments, we 'return' a positional
- specification and an index to go on scanning the [fmt] format at hand.
-
- Note that this is optimized for the regular case, i.e. no positional
- parameter, since in this case we juste 'return' the constant
- [Spec_none]; in case we have a positional parameter, we 'return' a
- [Spec_index] [positional_specification] which is a bit more costly.
-
- Note also that we do not support [*$] specifications, since this would
- lead to type checking problems: a [*$] positional specification means
- 'take the next argument to [printf] (which must be an integer value)',
- name this integer value $n$; [*$] now designates parameter $n$.
-
- Unfortunately, the type of a parameter specified via a [*$] positional
- specification should be the type of the corresponding argument to
- [printf], hence this should be the type of the $n$-th argument to [printf]
- with $n$ being the {\em value} of the integer argument defining [*]; we
- clearly cannot statically guess the value of this parameter in the general
- case. Put it another way: this means type dependency, which is completely
- out of scope of the OCaml type algebra. *)
-
-let scan_positional_spec fmt got_spec i =
- match Sformat.unsafe_get fmt i with
- | '0'..'9' as d ->
- let rec get_int_literal accu j =
- match Sformat.unsafe_get fmt j with
- | '0'..'9' as d ->
- get_int_literal (10 * accu + (int_of_char d - 48)) (succ j)
- | '$' ->
- if accu = 0 then
- failwith "printf: bad positional specification (0)." else
- got_spec (Spec_index (Sformat.index_of_literal_position accu)) (succ j)
- (* Not a positional specification: tell so the caller, and go back to
- scanning the format from the original [i] position we were called at
- first. *)
- | _ -> got_spec Spec_none i in
- get_int_literal (int_of_char d - 48) (succ i)
- (* No positional specification: tell so the caller, and go back to scanning
- the format from the original [i] position. *)
- | _ -> got_spec Spec_none i
-;;
-
-(* Get the index of the next argument to printf, according to the given
- positional specification. *)
-let next_index spec n =
- match spec with
- | Spec_none -> Sformat.succ_index n
- | Spec_index _ -> n
-;;
-
-(* Get the index of the actual argument to printf, according to its
- optional positional specification. *)
-let get_index spec n =
- match spec with
- | Spec_none -> n
- | Spec_index p -> p
-;;
-
-(* Format a float argument as a valid OCaml lexeme. *)
-let format_float_lexeme =
-
- (* To be revised: this procedure should be a unique loop that performs the
- validity check and the string lexeme modification at the same time.
- Otherwise, it is too difficult to handle the strange padding facilities
- given by printf. Let alone handling the correct widths indication,
- knowing that we have sometime to add a '.' at the end of the result!
- *)
-
- let make_valid_float_lexeme s =
- (* Check if s is already a valid lexeme:
- in this case do nothing,
- otherwise turn s into a valid OCaml lexeme. *)
- let l = String.length s in
- let rec valid_float_loop i =
- if i >= l then s ^ "." else
- match s.[i] with
- (* Sure, this is already a valid float lexeme. *)
- | '.' | 'e' | 'E' -> s
- | _ -> valid_float_loop (i + 1) in
-
- valid_float_loop 0 in
-
- (fun sfmt x ->
- match classify_float x with
- | FP_normal | FP_subnormal | FP_zero ->
- make_valid_float_lexeme (format_float sfmt x)
- | FP_infinite ->
- if x < 0.0 then "neg_infinity" else "infinity"
- | FP_nan ->
- "nan")
-;;
-
-(* Decode a format string and act on it.
- [fmt] is the [printf] format string, and [pos] points to a [%] character in
- the format string.
- After consuming the appropriate number of arguments and formatting
- them, one of the following five continuations described below is called:
-
- - [cont_s] for outputting a string
- (arguments: arg num, string, next pos)
- - [cont_a] for performing a %a action
- (arguments: arg num, fn, arg, next pos)
- - [cont_t] for performing a %t action
- (arguments: arg num, fn, next pos)
- - [cont_f] for performing a flush action
- (arguments: arg num, next pos)
- - [cont_m] for performing a %( action
- (arguments: arg num, sfmt, next pos)
-
- "arg num" is the index in array [args] of the next argument to [printf].
- "next pos" is the position in [fmt] of the first character following
- the %conversion specification in [fmt]. *)
-
-(* Note: here, rather than test explicitly against [Sformat.length fmt]
- to detect the end of the format, we use [Sformat.unsafe_get] and
- rely on the fact that we'll get a "null" character if we access
- one past the end of the string. These "null" characters are then
- caught by the [_ -> bad_conversion] clauses below.
- Don't do this at home, kids. *)
-let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m =
-
- let get_arg spec n =
- Obj.magic (args.(Sformat.int_of_index (get_index spec n))) in
-
- let rec scan_positional n widths i =
- let got_spec spec i = scan_flags spec n widths i in
- scan_positional_spec fmt got_spec i
-
- and scan_flags spec n widths i =
- match Sformat.unsafe_get fmt i with
- | '*' ->
- let got_spec wspec i =
- let (width : int) = get_arg wspec n in
- scan_flags spec (next_index wspec n) (width :: widths) i in
- scan_positional_spec fmt got_spec (succ i)
- | '0'..'9'
- | '.' | '#' | '-' | ' ' | '+' -> scan_flags spec n widths (succ i)
- | _ -> scan_conv spec n widths i
-
- and scan_conv spec n widths i =
- match Sformat.unsafe_get fmt i with
- | '%' | '@' as c ->
- cont_s n (String.make 1 c) (succ i)
- | '!' -> cont_f n (succ i)
- | ',' -> cont_s n "" (succ i)
- | 's' | 'S' as conv ->
- let (x : string) = get_arg spec n in
- let x = if conv = 's' then x else "\"" ^ String.escaped x ^ "\"" in
- let s =
- (* Optimize for common case %s *)
- if i = succ pos then x else
- format_string (extract_format fmt pos i widths) x in
- cont_s (next_index spec n) s (succ i)
- | '[' as conv ->
- bad_conversion_format fmt i conv
- | 'c' | 'C' as conv ->
- let (x : char) = get_arg spec n in
- let s =
- if conv = 'c' then String.make 1 x else "'" ^ Char.escaped x ^ "'" in
- cont_s (next_index spec n) s (succ i)
- | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' | 'N' as conv ->
- let (x : int) = get_arg spec n in
- let s =
- format_int (extract_format_int conv fmt pos i widths) x in
- cont_s (next_index spec n) s (succ i)
- | 'f' | 'e' | 'E' | 'g' | 'G' ->
- let (x : float) = get_arg spec n in
- let s = format_float (extract_format fmt pos i widths) x in
- cont_s (next_index spec n) s (succ i)
- | 'F' as conv ->
- let (x : float) = get_arg spec n in
- let s =
- format_float_lexeme
- (if widths = []
- then "%.12g"
- else extract_format_float conv fmt pos i widths)
- x in
- cont_s (next_index spec n) s (succ i)
- | 'B' | 'b' ->
- let (x : bool) = get_arg spec n in
- cont_s (next_index spec n) (string_of_bool x) (succ i)
- | 'a' ->
- let printer = get_arg spec n in
- (* If the printer spec is Spec_none, go on as usual.
- If the printer spec is Spec_index p,
- printer's argument spec is Spec_index (succ_index p). *)
- let n = Sformat.succ_index (get_index spec n) in
- let arg = get_arg Spec_none n in
- cont_a (next_index spec n) printer arg (succ i)
- | 'r' as conv ->
- bad_conversion_format fmt i conv
- | 't' ->
- let printer = get_arg spec n in
- cont_t (next_index spec n) printer (succ i)
- | 'l' | 'n' | 'L' as conv ->
- begin match Sformat.unsafe_get fmt (succ i) with
- | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' ->
- let i = succ i in
- let s =
- match conv with
- | 'l' ->
- let (x : int32) = get_arg spec n in
- format_int32 (extract_format fmt pos i widths) x
- | 'n' ->
- let (x : nativeint) = get_arg spec n in
- format_nativeint (extract_format fmt pos i widths) x
- | _ ->
- let (x : int64) = get_arg spec n in
- format_int64 (extract_format fmt pos i widths) x in
- cont_s (next_index spec n) s (succ i)
- | _ ->
- let (x : int) = get_arg spec n in
- let s = format_int (extract_format_int 'n' fmt pos i widths) x in
- cont_s (next_index spec n) s (succ i)
- end
- | '{' | '(' as conv (* ')' '}' *) ->
- let (xf : ('a, 'b, 'c, 'd, 'e, 'f) format6) = get_arg spec n in
- let i = succ i in
- let i = sub_format_for_printf conv fmt i in
- if conv = '{' (* '}' *) then
- (* Just print the format argument as a specification. *)
- cont_s
- (next_index spec n)
- (summarize_format_type xf)
- i else
- (* Use the format argument instead of the format specification. *)
- cont_m (next_index spec n) xf i
- | (* '(' *) ')' ->
- cont_s n "" (succ i)
- | conv ->
- bad_conversion_format fmt i conv in
-
- scan_positional n [] (succ pos)
-;;
-
-let mkprintf to_s get_out outc outs flush k fmt =
-
- (* [out] is global to this definition of [pr], and must be shared by all its
- recursive calls (if any). *)
- let out = get_out fmt in
- let outc c = outc out c in
- let outs s = outs out s in
-
- let rec pr k n fmt v =
-
- let len = Sformat.length fmt in
-
- let rec doprn n i =
- if i >= len then Obj.magic (k out) else
- match Sformat.unsafe_get fmt i with
- | '%' -> scan_format fmt v n i cont_s cont_a cont_t cont_f cont_m
- | c -> outc c; doprn n (succ i)
-
- and cont_s n s i =
- outs s; doprn n i
- and cont_a n printer arg i =
- if to_s then
- outs ((Obj.magic printer : unit -> _ -> string) () arg)
- else
- printer out arg;
- doprn n i
- and cont_t n printer i =
- if to_s then
- outs ((Obj.magic printer : unit -> string) ())
- else
- printer out;
- doprn n i
- and cont_f n i =
- flush out; doprn n i
- and cont_m n xf i =
- let m =
- Sformat.add_int_index
- (count_printing_arguments_of_format xf) n in
- pr (Obj.magic (fun _ -> doprn m i)) n xf v in
-
- doprn n 0 in
-
- let kpr = pr k (Sformat.index_of_int 0) in
-
- kapr kpr fmt
-;;
-
-(**************************************************************
-
- Defining [fprintf] and various flavors of [fprintf].
-
- **************************************************************)
-
-let kfprintf k oc =
- mkprintf false (fun _ -> oc) output_char output_string flush k
-;;
-let ikfprintf k oc = kapr (fun _ _ -> Obj.magic (k oc));;
-
-let fprintf oc = kfprintf ignore oc;;
-let ifprintf oc = ikfprintf ignore oc;;
-let printf fmt = fprintf stdout fmt;;
-let eprintf fmt = fprintf stderr fmt;;
-
-let kbprintf k b =
- mkprintf false (fun _ -> b) Buffer.add_char Buffer.add_string ignore k
-;;
-let bprintf b = kbprintf ignore b;;
-
-let get_buff fmt =
- let len = 2 * Sformat.length fmt in
- Buffer.create len
-;;
-
-let get_contents b =
- let s = Buffer.contents b in
- Buffer.clear b;
- s
-;;
-
-let get_cont k b = k (get_contents b);;
-
-let ksprintf k =
- mkprintf true get_buff Buffer.add_char Buffer.add_string ignore (get_cont k)
-;;
-
-let sprintf fmt = ksprintf (fun s -> s) fmt;;
-
-(**************************************************************
-
- Deprecated stuff.
-
- **************************************************************)
-
-let kprintf = ksprintf;;
-
-(* For OCaml system internal use only: needed to implement modules [Format]
- and [Scanf]. *)
-
-module CamlinternalPr = struct
-
- module Sformat = Sformat;;
-
- module Tformat = struct
-
- type ac =
- Ac.ac = {
- mutable ac_rglr : int;
- mutable ac_skip : int;
- mutable ac_rdrs : int;
- }
- ;;
-
- let ac_of_format = ac_of_format;;
-
- let count_printing_arguments_of_format =
- count_printing_arguments_of_format;;
-
- let sub_format = sub_format;;
-
- let summarize_format_type = summarize_format_type;;
-
- let scan_format = scan_format;;
-
- let kapr = kapr;;
-
- end
- ;;
-
-end
-;;
+open CamlinternalFormatBasics
+open CamlinternalFormat
+
+let kfprintf k o (Format (fmt, _)) =
+ make_printf (fun o acc -> output_acc o acc; k o) o End_of_acc fmt
+let kbprintf k b (Format (fmt, _)) =
+ make_printf (fun b acc -> bufput_acc b acc; k b) b End_of_acc fmt
+let ikfprintf k oc (Format (fmt, _)) =
+ make_printf (fun oc _ -> k oc) oc End_of_acc fmt
+
+let fprintf oc fmt = kfprintf ignore oc fmt
+let bprintf b fmt = kbprintf ignore b fmt
+let ifprintf oc fmt = ikfprintf ignore oc fmt
+let printf fmt = fprintf stdout fmt
+let eprintf fmt = fprintf stderr fmt
+
+let ksprintf k (Format (fmt, _)) =
+ let k' () acc =
+ let buf = Buffer.create 64 in
+ strput_acc buf acc;
+ k (Buffer.contents buf) in
+ make_printf k' () End_of_acc fmt
+
+let sprintf fmt = ksprintf (fun s -> s) fmt
+
+let kprintf = ksprintf
diff --git a/stdlib/printf.mli b/stdlib/printf.mli
index a75a64181..21e28159a 100644
--- a/stdlib/printf.mli
+++ b/stdlib/printf.mli
@@ -163,76 +163,5 @@ val kbprintf : (Buffer.t -> 'a) -> Buffer.t ->
(** Deprecated *)
-val kprintf : (string -> 'a) -> ('b, unit, string, 'a) format4 -> 'b;;
+val kprintf : (string -> 'a) -> ('b, unit, string, 'a) format4 -> 'b
(** A deprecated synonym for [ksprintf]. *)
-
-(**/**)
-
-(* The following is for system use only. Do not call directly. *)
-
-module CamlinternalPr : sig
-
- module Sformat : sig
- type index;;
-
- val index_of_int : int -> index;;
- external int_of_index : index -> int = "%identity";;
- external unsafe_index_of_int : int -> index = "%identity";;
-
- val succ_index : index -> index;;
- val add_int_index : int -> index -> index;;
-
- val sub : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> index -> int -> string;;
- val to_string : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> string;;
- external length : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> int
- = "%string_length";;
- external get : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> int -> char
- = "%string_safe_get";;
- external unsafe_to_string : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> string
- = "%identity";;
- external unsafe_get : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> int -> char
- = "%string_unsafe_get";;
-
- end;;
-
- module Tformat : sig
-
- type ac = {
- mutable ac_rglr : int;
- mutable ac_skip : int;
- mutable ac_rdrs : int;
- };;
-
- val ac_of_format : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> ac;;
- val count_printing_arguments_of_format :
- ('a, 'b, 'c, 'd, 'e, 'f) format6 -> int;;
-
- val sub_format :
- (('a, 'b, 'c, 'd, 'e, 'f) format6 -> int) ->
- (('a, 'b, 'c, 'd, 'e, 'f) format6 -> int -> char -> int) ->
- char ->
- ('a, 'b, 'c, 'd, 'e, 'f) format6 ->
- int ->
- int
-
- val summarize_format_type : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> string
-
- val scan_format : ('a, 'b, 'c, 'd, 'e, 'f) format6 ->
- 'g array ->
- Sformat.index ->
- int ->
- (Sformat.index -> string -> int -> 'h) ->
- (Sformat.index -> 'i -> 'j -> int -> 'h) ->
- (Sformat.index -> 'k -> int -> 'h) ->
- (Sformat.index -> int -> 'h) ->
- (Sformat.index -> ('l, 'm, 'n, 'o, 'p, 'q) format6 -> int -> 'h) ->
- 'h
-
- val kapr :
- (('a, 'b, 'c, 'd, 'e, 'f) format6 -> Obj.t array -> 'g) ->
- ('a, 'b, 'c, 'd, 'e, 'f) format6 ->
- 'g
-
- end;;
-
-end;;
diff --git a/stdlib/queue.mli b/stdlib/queue.mli
index 55e898832..5cd1136e4 100644
--- a/stdlib/queue.mli
+++ b/stdlib/queue.mli
@@ -16,7 +16,7 @@
This module implements queues (FIFOs), with in-place modification.
{b Warning} This module is not thread-safe: each {!Queue.t} value
- must be protected from concurrent access (e.g. with a {!Mutex.t}).
+ must be protected from concurrent access (e.g. with a [Mutex.t]).
Failure to do so can lead to a crash.
*)
diff --git a/stdlib/scanf.ml b/stdlib/scanf.ml
index 8f694fd3a..2a63ced9a 100644
--- a/stdlib/scanf.ml
+++ b/stdlib/scanf.ml
@@ -11,6 +11,19 @@
(* *)
(***********************************************************************)
+open CamlinternalFormatBasics
+open CamlinternalFormat
+
+(* alias to avoid warning for ambiguity between
+ Pervasives.format6
+ and CamlinternalFormatBasics.format6
+
+ (the former is in fact an alias for the latter,
+ but the ambiguity warning doesn't care)
+*)
+type ('a, 'b, 'c, 'd, 'e, 'f) format6 =
+ ('a, 'b, 'c, 'd, 'e, 'f) Pervasives.format6
+
(* The run-time library for scanners. *)
(* Scanning buffers. *)
@@ -318,17 +331,17 @@ module Scanning : SCANNING = struct
let from_ic scan_close_ic iname ic =
let len = !file_buffer_size in
- let buf = String.create len in
+ let buf = Bytes.create len in
let i = ref 0 in
let lim = ref 0 in
let eof = ref false in
let next () =
- if !i < !lim then begin let c = buf.[!i] in incr i; c end else
+ if !i < !lim then begin let c = Bytes.get buf !i in incr i; c end else
if !eof then raise End_of_file else begin
lim := input ic buf 0 len;
if !lim = 0 then begin eof := true; scan_close_ic ic end else begin
i := 1;
- buf.[0]
+ Bytes.get buf 0
end
end in
create iname next
@@ -402,11 +415,6 @@ end
type ('a, 'b, 'c, 'd) scanner =
('a, Scanning.in_channel, 'b, 'c, 'a -> 'd, 'd) format6 -> 'c
-;;
-
-external string_to_format :
- string -> ('a, 'b, 'c, 'd, 'e, 'f) format6 = "%identity"
-;;
(* Reporting errors. *)
exception Scan_failure of string;;
@@ -429,33 +437,6 @@ let bad_end_of_input message =
(Printf.sprintf
"scanning of %s failed: \
premature end of file occurred before end of token" message)
-;;
-
-let int_of_width_opt = function
- | None -> max_int
- | Some width -> width
-;;
-
-let int_of_prec_opt = function
- | None -> max_int
- | Some prec -> prec
-;;
-
-module Sformat = Printf.CamlinternalPr.Sformat;;
-module Tformat = Printf.CamlinternalPr.Tformat;;
-
-let bad_conversion fmt i c =
- invalid_arg
- (Printf.sprintf
- "scanf: bad conversion %%%C, at char number %i \
- in format string \'%s\'" c i (Sformat.to_string fmt))
-;;
-
-let incomplete_format fmt =
- invalid_arg
- (Printf.sprintf "scanf: premature end of format string \'%s\'"
- (Sformat.to_string fmt))
-;;
let bad_float () =
bad_input "no dot or exponent part found in float token"
@@ -467,19 +448,15 @@ let character_mismatch_err c ci =
let character_mismatch c ci =
bad_input (character_mismatch_err c ci)
-;;
-
-let format_mismatch_err fmt1 fmt2 =
- Printf.sprintf
- "format read \'%s\' does not match specification \'%s\'" fmt1 fmt2
-;;
-
-let format_mismatch fmt1 fmt2 = bad_input (format_mismatch_err fmt1 fmt2);;
-(* Checking that 2 format strings are type compatible. *)
-let compatible_format_type fmt1 fmt2 =
- Tformat.summarize_format_type (string_to_format fmt1) =
- Tformat.summarize_format_type (string_to_format fmt2);;
+let rec skip_whites ib =
+ let c = Scanning.peek_char ib in
+ if not (Scanning.eof ib) then begin
+ match c with
+ | ' ' | '\t' | '\n' | '\r' ->
+ Scanning.invalidate_current_char ib; skip_whites ib
+ | _ -> ()
+ end
(* Checking that [c] is indeed in the input, then skips it.
In this case, the character [c] has been explicitly specified in the
@@ -496,28 +473,13 @@ let compatible_format_type fmt1 fmt2 =
We are also careful to treat "\r\n" in the input as an end of line marker:
it always matches a '\n' specification in the input format string. *)
let rec check_char ib c =
- let ci = Scanning.checked_peek_char ib in
- if ci = c then Scanning.invalidate_current_char ib else begin
- match ci with
- | '\r' when c = '\n' ->
- Scanning.invalidate_current_char ib; check_char ib '\n'
- | _ -> character_mismatch c ci
- end
-;;
-
-(* Checks that the current char is indeed one of the stopper characters,
- then skips it.
- Be careful that if ib has no more character this procedure should
- just do nothing (since %s@c defaults to the entire rest of the
- buffer, when no character c can be found in the input). *)
-let ignore_stoppers stps ib =
- if stps <> [] && not (Scanning.eof ib) then
- let ci = Scanning.peek_char ib in
- if List.memq ci stps then Scanning.invalidate_current_char ib else
- let sr = String.concat "" (List.map (String.make 1) stps) in
- bad_input
- (Printf.sprintf "looking for one of range %S, found %C" sr ci)
-;;
+ if c = ' ' then skip_whites ib else
+ let ci = Scanning.checked_peek_char ib in
+ if ci = c then Scanning.invalidate_current_char ib else
+ match ci with
+ | '\r' when c = '\n' ->
+ Scanning.invalidate_current_char ib; check_char ib '\n'
+ | _ -> character_mismatch c ci
(* Extracting tokens from the output token buffer. *)
@@ -701,7 +663,7 @@ let scan_optionally_signed_int width ib =
scan_unsigned_int width ib
;;
-let scan_int_conv conv width _prec ib =
+let scan_int_conv conv width ib =
match conv with
| 'b' -> scan_binary_int width ib
| 'd' -> scan_optionally_signed_decimal_int width ib
@@ -791,7 +753,7 @@ let scan_float width precision ib =
scan_exp_part width ib, precision
;;
-let scan_Float width precision ib =
+let scan_caml_float width precision ib =
let width = scan_optionally_signed_decimal_int width ib in
if width = 0 then bad_float () else
let c = Scanning.peek_char ib in
@@ -805,12 +767,11 @@ let scan_Float width precision ib =
| 'e' | 'E' ->
scan_exp_part width ib
| _ -> bad_float ()
-;;
(* Scan a regular string:
stops when encountering a space, if no scanning indication has been given;
- otherwise, stops when encountering one of the characters in the scanning
- indication list [stp].
+ otherwise, stops when encountering the characters in the scanning
+ indication [stp].
It also stops at end of file or when the maximum number of characters has
been read.*)
let scan_string stp width ib =
@@ -818,12 +779,14 @@ let scan_string stp width ib =
if width = 0 then width else
let c = Scanning.peek_char ib in
if Scanning.eof ib then width else
- if stp = [] then
- match c with
- | ' ' | '\t' | '\n' | '\r' -> width
- | c -> loop (Scanning.store_char width ib c) else
- if List.memq c stp then Scanning.skip_char width ib else
- loop (Scanning.store_char width ib c) in
+ match stp with
+ | Some c' when c = c' -> Scanning.skip_char width ib
+ | Some _ -> loop (Scanning.store_char width ib c)
+ | None ->
+ match c with
+ | ' ' | '\t' | '\n' | '\r' -> width
+ | _ -> loop (Scanning.store_char width ib c)
+ in
loop width
;;
@@ -925,7 +888,7 @@ let scan_backslash_char width ib =
;;
(* Scan a character (an OCaml token). *)
-let scan_Char width ib =
+let scan_caml_char width ib =
let rec find_start width =
match Scanning.checked_peek_char ib with
@@ -948,7 +911,7 @@ let scan_Char width ib =
;;
(* Scan a delimited string (an OCaml token). *)
-let scan_String width ib =
+let scan_caml_string width ib =
let rec find_start width =
match Scanning.checked_peek_char ib with
@@ -981,8 +944,7 @@ let scan_String width ib =
;;
(* Scan a boolean (an OCaml token). *)
-let scan_bool width ib =
- if width < 4 then bad_token_length "a boolean" else
+let scan_bool ib =
let c = Scanning.checked_peek_char ib in
let m =
match c with
@@ -991,560 +953,384 @@ let scan_bool width ib =
| c ->
bad_input
(Printf.sprintf "the character %C cannot start a boolean" c) in
- scan_string [] (min width m) ib
-;;
-
-(* Reading char sets in %[...] conversions. *)
-type char_set =
- | Pos_set of string (* Positive (regular) set. *)
- | Neg_set of string (* Negative (complementary) set. *)
-;;
-
-
-(* Char sets are read as sub-strings in the format string. *)
-let scan_range fmt j =
-
- let len = Sformat.length fmt in
-
- let buffer = Buffer.create len in
-
- let rec scan_closing j =
- if j >= len then incomplete_format fmt else
- match Sformat.get fmt j with
- | ']' -> j, Buffer.contents buffer
- | '%' ->
- let j = j + 1 in
- if j >= len then incomplete_format fmt else
- begin match Sformat.get fmt j with
- | '%' | '@' as c ->
- Buffer.add_char buffer c;
- scan_closing (j + 1)
- | c -> bad_conversion fmt j c
- end
- | c ->
- Buffer.add_char buffer c;
- scan_closing (j + 1) in
-
- let scan_first_pos j =
- if j >= len then incomplete_format fmt else
- match Sformat.get fmt j with
- | ']' as c ->
- Buffer.add_char buffer c;
- scan_closing (j + 1)
- | _ -> scan_closing j in
-
- let scan_first_neg j =
- if j >= len then incomplete_format fmt else
- match Sformat.get fmt j with
- | '^' ->
- let j = j + 1 in
- let k, char_set = scan_first_pos j in
- k, Neg_set char_set
- | _ ->
- let k, char_set = scan_first_pos j in
- k, Pos_set char_set in
-
- scan_first_neg j
-;;
-
-(* Char sets are now represented as bit vectors that are represented as
- byte strings. *)
-
-(* Bit manipulations into bytes. *)
-let set_bit_of_byte byte idx b =
- (b lsl idx) lor (byte land (* mask idx *) (lnot (1 lsl idx)))
-;;
-
-let get_bit_of_byte byte idx = (byte lsr idx) land 1;;
-
-(* Bit manipulations in vectors of bytes represented as strings. *)
-let set_bit_of_range r c b =
- let idx = c land 0x7 in
- let ydx = c lsr 3 in
- let byte = r.[ydx] in
- r.[ydx] <- char_of_int (set_bit_of_byte (int_of_char byte) idx b)
-;;
-
-let get_bit_of_range r c =
- let idx = c land 0x7 in
- let ydx = c lsr 3 in
- let byte = r.[ydx] in
- get_bit_of_byte (int_of_char byte) idx
-;;
-
-(* Char sets represented as bit vectors represented as fixed length byte
- strings. *)
-(* Create a full or empty set of chars. *)
-let make_range bit =
- let c = char_of_int (if bit = 0 then 0 else 0xFF) in
- String.make 32 c
-;;
-
-(* Test if a char belongs to a set of chars. *)
-let get_char_in_range r c = get_bit_of_range r (int_of_char c);;
-
-let bit_not b = (lnot b) land 1;;
-
-(* Build the bit vector corresponding to the set of characters
- that belongs to the string argument [set].
- (In the [Scanf] module [set] is always a sub-string of the format.) *)
-let make_char_bit_vect bit set =
- let r = make_range (bit_not bit) in
- let lim = String.length set - 1 in
- let rec loop bit rp i =
- if i <= lim then
- match set.[i] with
- | '-' when rp ->
- (* if i = 0 then rp is false (since the initial call is
- loop bit false 0). Hence i >= 1 and the following is safe. *)
- let c1 = set.[i - 1] in
- let i = succ i in
- if i > lim then loop bit false (i - 1) else
- let c2 = set.[i] in
- for j = int_of_char c1 to int_of_char c2 do
- set_bit_of_range r j bit done;
- loop bit false (succ i)
- | _ ->
- set_bit_of_range r (int_of_char set.[i]) bit;
- loop bit true (succ i) in
- loop bit false 0;
- r
-;;
-
-(* Compute the predicate on chars corresponding to a char set. *)
-let make_predicate bit set stp =
- let r = make_char_bit_vect bit set in
- List.iter
- (fun c -> set_bit_of_range r (int_of_char c) (bit_not bit)) stp;
- (fun c -> get_char_in_range r c)
-;;
-
-let make_setp stp char_set =
- match char_set with
- | Pos_set set ->
- begin match String.length set with
- | 0 -> (fun _ -> 0)
- | 1 ->
- let p = set.[0] in
- (fun c -> if c == p then 1 else 0)
- | 2 ->
- let p1 = set.[0] and p2 = set.[1] in
- (fun c -> if c == p1 || c == p2 then 1 else 0)
- | 3 ->
- let p1 = set.[0] and p2 = set.[1] and p3 = set.[2] in
- if p2 = '-' then make_predicate 1 set stp else
- (fun c -> if c == p1 || c == p2 || c == p3 then 1 else 0)
- | _ -> make_predicate 1 set stp
- end
- | Neg_set set ->
- begin match String.length set with
- | 0 -> (fun _ -> 1)
- | 1 ->
- let p = set.[0] in
- (fun c -> if c != p then 1 else 0)
- | 2 ->
- let p1 = set.[0] and p2 = set.[1] in
- (fun c -> if c != p1 && c != p2 then 1 else 0)
- | 3 ->
- let p1 = set.[0] and p2 = set.[1] and p3 = set.[2] in
- if p2 = '-' then make_predicate 0 set stp else
- (fun c -> if c != p1 && c != p2 && c != p3 then 1 else 0)
- | _ -> make_predicate 0 set stp
- end
-;;
-
-let setp_table = Hashtbl.create 7;;
-
-let add_setp stp char_set setp =
- let char_set_tbl =
- try Hashtbl.find setp_table char_set with
- | Not_found ->
- let char_set_tbl = Hashtbl.create 3 in
- Hashtbl.add setp_table char_set char_set_tbl;
- char_set_tbl in
- Hashtbl.add char_set_tbl stp setp
-;;
+ scan_string None m ib
-let find_setp stp char_set =
- try Hashtbl.find (Hashtbl.find setp_table char_set) stp with
- | Not_found ->
- let setp = make_setp stp char_set in
- add_setp stp char_set setp;
- setp
-;;
-
-let scan_chars_in_char_set stp char_set width ib =
- let rec loop_pos1 cp1 width =
- if width = 0 then width else
- let c = Scanning.peek_char ib in
- if Scanning.eof ib then width else
- if c == cp1
- then loop_pos1 cp1 (Scanning.store_char width ib c)
- else width
- and loop_pos2 cp1 cp2 width =
- if width = 0 then width else
- let c = Scanning.peek_char ib in
- if Scanning.eof ib then width else
- if c == cp1 || c == cp2
- then loop_pos2 cp1 cp2 (Scanning.store_char width ib c)
- else width
- and loop_pos3 cp1 cp2 cp3 width =
- if width = 0 then width else
- let c = Scanning.peek_char ib in
- if Scanning.eof ib then width else
- if c == cp1 || c == cp2 || c == cp3
- then loop_pos3 cp1 cp2 cp3 (Scanning.store_char width ib c)
- else width
- and loop_neg1 cp1 width =
- if width = 0 then width else
+(* Scan a string containing elements in char_set and terminated by scan_indic
+ if provided. *)
+let scan_chars_in_char_set char_set scan_indic width ib =
+ let rec scan_chars i stp =
let c = Scanning.peek_char ib in
- if Scanning.eof ib then width else
- if c != cp1
- then loop_neg1 cp1 (Scanning.store_char width ib c)
- else width
- and loop_neg2 cp1 cp2 width =
- if width = 0 then width else
- let c = Scanning.peek_char ib in
- if Scanning.eof ib then width else
- if c != cp1 && c != cp2
- then loop_neg2 cp1 cp2 (Scanning.store_char width ib c)
- else width
- and loop_neg3 cp1 cp2 cp3 width =
- if width = 0 then width else
- let c = Scanning.peek_char ib in
- if Scanning.eof ib then width else
- if c != cp1 && c != cp2 && c != cp3
- then loop_neg3 cp1 cp2 cp3 (Scanning.store_char width ib c)
- else width
- and loop setp width =
- if width = 0 then width else
- let c = Scanning.peek_char ib in
- if Scanning.eof ib then width else
- if setp c == 1
- then loop setp (Scanning.store_char width ib c)
- else width in
-
- let width =
- match char_set with
- | Pos_set set ->
- begin match String.length set with
- | 0 -> loop (fun _ -> 0) width
- | 1 -> loop_pos1 set.[0] width
- | 2 -> loop_pos2 set.[0] set.[1] width
- | 3 when set.[1] != '-' -> loop_pos3 set.[0] set.[1] set.[2] width
- | _ -> loop (find_setp stp char_set) width end
- | Neg_set set ->
- begin match String.length set with
- | 0 -> loop (fun _ -> 1) width
- | 1 -> loop_neg1 set.[0] width
- | 2 -> loop_neg2 set.[0] set.[1] width
- | 3 when set.[1] != '-' -> loop_neg3 set.[0] set.[1] set.[2] width
- | _ -> loop (find_setp stp char_set) width end in
- ignore_stoppers stp ib;
- width
-;;
-
-let get_count t ib =
- match t with
- | 'l' -> Scanning.line_count ib
- | 'n' -> Scanning.char_count ib
- | _ -> Scanning.token_count ib
-;;
-
-let rec skip_whites ib =
- let c = Scanning.peek_char ib in
- if not (Scanning.eof ib) then begin
- match c with
- | ' ' | '\t' | '\n' | '\r' ->
- Scanning.invalidate_current_char ib; skip_whites ib
- | _ -> ()
- end
-;;
+ if i > 0 && not (Scanning.eof ib) && is_in_char_set char_set c &&
+ int_of_char c <> stp then
+ let _ = Scanning.store_char max_int ib c in
+ scan_chars (i - 1) stp;
+ in
+ match scan_indic with
+ | None -> scan_chars width (-1);
+ | Some c ->
+ scan_chars width (int_of_char c);
+ if not (Scanning.eof ib) then
+ let ci = Scanning.peek_char ib in
+ if c = ci then Scanning.invalidate_current_char ib
+ else character_mismatch c ci
(* The global error report function for [Scanf]. *)
let scanf_bad_input ib = function
| Scan_failure s | Failure s ->
let i = Scanning.char_count ib in
- bad_input (Printf.sprintf "scanf: bad input at char number %i: \'%s\'" i s)
+ bad_input (Printf.sprintf "scanf: bad input at char number %i: %S" i s)
| x -> raise x
-;;
-let list_iter_i f l =
- let rec loop i = function
- | [] -> ()
- | [x] -> f i x (* Tail calling [f] *)
- | x :: xs -> f i x; loop (succ i) xs in
- loop 0 l
-;;
+(* Get the content of a counter from an input buffer. *)
+let get_counter ib counter = match counter with
+ | Line_counter -> Scanning.line_count ib
+ | Char_counter -> Scanning.char_count ib
+ | Token_counter -> Scanning.token_count ib
-let ascanf sc fmt =
- let ac = Tformat.ac_of_format fmt in
- match ac.Tformat.ac_rdrs with
- | 0 ->
- Obj.magic (fun f -> sc fmt [||] f)
- | 1 ->
- Obj.magic (fun x f -> sc fmt [| Obj.repr x |] f)
- | 2 ->
- Obj.magic (fun x y f -> sc fmt [| Obj.repr x; Obj.repr y; |] f)
- | 3 ->
- Obj.magic
- (fun x y z f -> sc fmt [| Obj.repr x; Obj.repr y; Obj.repr z; |] f)
- | nargs ->
- let rec loop i args =
- if i >= nargs then
- let a = Array.make nargs (Obj.repr 0) in
- list_iter_i (fun i arg -> a.(nargs - i - 1) <- arg) args;
- Obj.magic (fun f -> sc fmt a f)
- else Obj.magic (fun x -> loop (succ i) (x :: args)) in
- loop 0 []
-;;
-
-(* The [scan_format] main scanning function.
- It takes as arguments:
- - an input buffer [ib] from which to read characters,
- - an error handling function [ef],
- - a format [fmt] that specifies what to read in the input,
- - a vector of user's defined readers [rv],
- - and a function [f] to pass the tokens read to.
-
- Then [scan_format] scans the format and the input buffer in parallel to
- find out tokens as specified by the format; when it finds one token, it
- converts it as specified, remembers the converted value as a future
- argument to the function [f], and continues scanning.
-
- If the entire scanning succeeds (i.e. the format string has been
- exhausted and the buffer has provided tokens according to the
- format string), [f] is applied to the tokens read.
-
- If the scanning or some conversion fails, the main scanning function
- aborts and applies the scanning buffer and a string that explains
- the error to the error handling function [ef] (the error continuation). *)
-
-let scan_format ib ef fmt rv f =
-
- let limr = Array.length rv - 1 in
-
- let return v = Obj.magic v () in
- let delay f x () = f x in
- let stack f = delay (return f) in
- let no_stack f _x = f in
-
- let rec scan fmt =
-
- let lim = Sformat.length fmt - 1 in
-
- let rec scan_fmt ir f i =
- if i > lim then ir, f else
- match Sformat.unsafe_get fmt i with
- | '%' -> scan_skip ir f (succ i)
- | ' ' -> skip_whites ib; scan_fmt ir f (succ i)
- | c -> check_char ib c; scan_fmt ir f (succ i)
-
- and scan_skip ir f i =
- if i > lim then ir, f else
- match Sformat.get fmt i with
- | '_' -> scan_limits true ir f (succ i)
- | _ -> scan_limits false ir f i
-
- and scan_limits skip ir f i =
-
- let rec scan_width i =
- if i > lim then incomplete_format fmt else
- match Sformat.get fmt i with
- | '0' .. '9' as conv ->
- let width, i =
- read_int_literal (decimal_value_of_char conv) (succ i) in
- Some width, i
- | _ -> None, i
-
- and scan_precision i =
- begin
- match Sformat.get fmt i with
- | '.' ->
- let precision, i = read_int_literal 0 (succ i) in
- (Some precision, i)
- | _ -> None, i
- end
+(* Compute the width of a padding option (see "%42{" and "%123("). *)
+let width_of_pad_opt pad_opt = match pad_opt with
+ | None -> max_int
+ | Some width -> width
- and read_int_literal accu i =
- if i > lim then accu, i else
- match Sformat.unsafe_get fmt i with
- | '0' .. '9' as c ->
- let accu = 10 * accu + decimal_value_of_char c in
- read_int_literal accu (succ i)
- | _ -> accu, i in
-
- if i > lim then ir, f else
- let width_opt, i = scan_width i in
- let prec_opt, i = scan_precision i in
- scan_conversion skip width_opt prec_opt ir f i
-
- and scan_conversion skip width_opt prec_opt ir f i =
- let stack = if skip then no_stack else stack in
- let width = int_of_width_opt width_opt in
- let prec = int_of_prec_opt prec_opt in
- match Sformat.get fmt i with
- | '%' | '@' as c ->
- check_char ib c;
- scan_fmt ir f (succ i)
- | '!' ->
- if not (Scanning.end_of_input ib)
- then bad_input "end of input not found" else
- scan_fmt ir f (succ i)
- | ',' ->
- scan_fmt ir f (succ i)
- | 's' ->
- let i, stp = scan_indication (succ i) in
- let _x = scan_string stp width ib in
- scan_fmt ir (stack f (token_string ib)) (succ i)
- | 'S' ->
- let _x = scan_String width ib in
- scan_fmt ir (stack f (token_string ib)) (succ i)
- | '[' (* ']' *) ->
- let i, char_set = scan_range fmt (succ i) in
- let i, stp = scan_indication (succ i) in
- let _x = scan_chars_in_char_set stp char_set width ib in
- scan_fmt ir (stack f (token_string ib)) (succ i)
- | ('c' | 'C') when width = 0 ->
- let c = Scanning.checked_peek_char ib in
- scan_fmt ir (stack f c) (succ i)
- | 'c' ->
- let _x = scan_char width ib in
- scan_fmt ir (stack f (token_char ib)) (succ i)
- | 'C' ->
- let _x = scan_Char width ib in
- scan_fmt ir (stack f (token_char ib)) (succ i)
- | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' as conv ->
- let _x = scan_int_conv conv width prec ib in
- scan_fmt ir (stack f (token_int conv ib)) (succ i)
- | 'N' as conv ->
- scan_fmt ir (stack f (get_count conv ib)) (succ i)
- | 'f' | 'e' | 'E' | 'g' | 'G' ->
- let _x = scan_float width prec ib in
- scan_fmt ir (stack f (token_float ib)) (succ i)
- | 'F' ->
- let _x = scan_Float width prec ib in
- scan_fmt ir (stack f (token_float ib)) (succ i)
-(* | 'B' | 'b' when width = Some 0 ->
- let _x = scan_bool width ib in
- scan_fmt ir (stack f (token_int ib)) (succ i) *)
- | 'B' | 'b' ->
- let _x = scan_bool width ib in
- scan_fmt ir (stack f (token_bool ib)) (succ i)
- | 'r' ->
- if ir > limr then assert false else
- let token = Obj.magic rv.(ir) ib in
- scan_fmt (succ ir) (stack f token) (succ i)
- | 'l' | 'n' | 'L' as conv0 ->
- let i = succ i in
- if i > lim then scan_fmt ir (stack f (get_count conv0 ib)) i else begin
- match Sformat.get fmt i with
- (* This is in fact an integer conversion (e.g. %ld, %ni, or %Lo). *)
- | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' as conv1 ->
- let _x = scan_int_conv conv1 width prec ib in
- (* Look back to the character that triggered the integer conversion
- (this character is either 'l', 'n' or 'L') to find the
- conversion to apply to the integer token read. *)
- begin match conv0 with
- | 'l' -> scan_fmt ir (stack f (token_int32 conv1 ib)) (succ i)
- | 'n' -> scan_fmt ir (stack f (token_nativeint conv1 ib)) (succ i)
- | _ -> scan_fmt ir (stack f (token_int64 conv1 ib)) (succ i) end
- (* This is not an integer conversion, but a regular %l, %n or %L. *)
- | _ -> scan_fmt ir (stack f (get_count conv0 ib)) i end
- | '(' | '{' as conv (* ')' '}' *) ->
- let i = succ i in
- (* Find [mf], the static specification for the format to read. *)
- let j =
- Tformat.sub_format
- incomplete_format bad_conversion conv fmt i in
- let mf = Sformat.sub fmt (Sformat.index_of_int i) (j - 2 - i) in
- (* Read [rf], the specified format string in the input buffer,
- and check its correctness w.r.t. [mf]. *)
- let _x = scan_String width ib in
- let rf = token_string ib in
- if not (compatible_format_type rf mf) then format_mismatch rf mf else
- (* Proceed according to the kind of metaformat found:
- - %{ mf %} simply returns [rf] as the token read,
- - %( mf %) returns [rf] as the first token read, then
- returns a second token obtained by scanning the input with
- format string [rf].
- Behaviour for %( mf %) is mandatory for sake of format string
- typechecking specification. To get pure format string
- substitution behaviour, you should use %_( mf %) that skips the
- first (format string) token and hence properly substitutes [mf] by
- [rf] in the format string argument.
+let stopper_of_formatting_lit fmting =
+ if fmting = Escaped_percent then '%', "" else
+ let str = string_of_formatting_lit fmting in
+ let stp = str.[1] in
+ let sub_str = String.sub str 2 (String.length str - 2) in
+ stp, sub_str
+
+(******************************************************************************)
+ (* Readers managment *)
+
+(* A call to take_format_readers on a format is evaluated into functions
+ taking readers as arguments and aggregate them into an heterogeneous list *)
+(* When all readers are taken, finally pass the list of the readers to the
+ continuation k. *)
+let rec take_format_readers : type a c d e f .
+ ((d, e) heter_list -> e) -> (a, Scanning.in_channel, c, d, e, f) fmt ->
+ d =
+fun k fmt -> match fmt with
+ | Reader fmt_rest ->
+ fun reader ->
+ let new_k readers_rest = k (Cons (reader, readers_rest)) in
+ take_format_readers new_k fmt_rest
+ | Char rest -> take_format_readers k rest
+ | Caml_char rest -> take_format_readers k rest
+ | String (_, rest) -> take_format_readers k rest
+ | Caml_string (_, rest) -> take_format_readers k rest
+ | Int (_, _, _, rest) -> take_format_readers k rest
+ | Int32 (_, _, _, rest) -> take_format_readers k rest
+ | Nativeint (_, _, _, rest) -> take_format_readers k rest
+ | Int64 (_, _, _, rest) -> take_format_readers k rest
+ | Float (_, _, _, rest) -> take_format_readers k rest
+ | Bool rest -> take_format_readers k rest
+ | Alpha rest -> take_format_readers k rest
+ | Theta rest -> take_format_readers k rest
+ | Flush rest -> take_format_readers k rest
+ | String_literal (_, rest) -> take_format_readers k rest
+ | Char_literal (_, rest) -> take_format_readers k rest
+
+ | Scan_char_set (_, _, rest) -> take_format_readers k rest
+ | Scan_get_counter (_, rest) -> take_format_readers k rest
+
+ | Formatting_lit (_, rest) -> take_format_readers k rest
+ | Formatting_gen (Open_tag (Format (fmt, _)), rest) -> take_format_readers k (concat_fmt fmt rest)
+ | Formatting_gen (Open_box (Format (fmt, _)), rest) -> take_format_readers k (concat_fmt fmt rest)
+
+ | Format_arg (_, _, rest) -> take_format_readers k rest
+ | Format_subst (_, fmtty, rest) ->
+ take_fmtty_format_readers k (erase_rel (symm fmtty)) rest
+ | Ignored_param (ign, rest) -> take_ignored_format_readers k ign rest
+
+ | End_of_format -> k Nil
+
+(* Take readers associated to an fmtty coming from a Format_subst "%(...%)". *)
+and take_fmtty_format_readers : type x y a c d e f .
+ ((d, e) heter_list -> e) -> (a, Scanning.in_channel, c, d, x, y) fmtty ->
+ (y, Scanning.in_channel, c, x, e, f) fmt -> d =
+fun k fmtty fmt -> match fmtty with
+ | Reader_ty fmt_rest ->
+ fun reader ->
+ let new_k readers_rest = k (Cons (reader, readers_rest)) in
+ take_fmtty_format_readers new_k fmt_rest fmt
+ | Ignored_reader_ty fmt_rest ->
+ fun reader ->
+ let new_k readers_rest = k (Cons (reader, readers_rest)) in
+ take_fmtty_format_readers new_k fmt_rest fmt
+ | Char_ty rest -> take_fmtty_format_readers k rest fmt
+ | String_ty rest -> take_fmtty_format_readers k rest fmt
+ | Int_ty rest -> take_fmtty_format_readers k rest fmt
+ | Int32_ty rest -> take_fmtty_format_readers k rest fmt
+ | Nativeint_ty rest -> take_fmtty_format_readers k rest fmt
+ | Int64_ty rest -> take_fmtty_format_readers k rest fmt
+ | Float_ty rest -> take_fmtty_format_readers k rest fmt
+ | Bool_ty rest -> take_fmtty_format_readers k rest fmt
+ | Alpha_ty rest -> take_fmtty_format_readers k rest fmt
+ | Theta_ty rest -> take_fmtty_format_readers k rest fmt
+ | Format_arg_ty (_, rest) -> take_fmtty_format_readers k rest fmt
+ | End_of_fmtty -> take_format_readers k fmt
+ | Format_subst_ty (ty1, ty2, rest) ->
+ let ty = trans (symm ty1) ty2 in
+ take_fmtty_format_readers k (concat_fmtty ty rest) fmt
+
+(* Take readers associated to an ignored parameter. *)
+and take_ignored_format_readers : type x y a c d e f .
+ ((d, e) heter_list -> e) -> (a, Scanning.in_channel, c, d, x, y) ignored ->
+ (y, Scanning.in_channel, c, x, e, f) fmt -> d =
+fun k ign fmt -> match ign with
+ | Ignored_reader ->
+ fun reader ->
+ let new_k readers_rest = k (Cons (reader, readers_rest)) in
+ take_format_readers new_k fmt
+ | Ignored_char -> take_format_readers k fmt
+ | Ignored_caml_char -> take_format_readers k fmt
+ | Ignored_string _ -> take_format_readers k fmt
+ | Ignored_caml_string _ -> take_format_readers k fmt
+ | Ignored_int (_, _) -> take_format_readers k fmt
+ | Ignored_int32 (_, _) -> take_format_readers k fmt
+ | Ignored_nativeint (_, _) -> take_format_readers k fmt
+ | Ignored_int64 (_, _) -> take_format_readers k fmt
+ | Ignored_float (_, _) -> take_format_readers k fmt
+ | Ignored_bool -> take_format_readers k fmt
+ | Ignored_format_arg _ -> take_format_readers k fmt
+ | Ignored_format_subst (_, fmtty) -> take_fmtty_format_readers k fmtty fmt
+ | Ignored_scan_char_set _ -> take_format_readers k fmt
+ | Ignored_scan_get_counter _ -> take_format_readers k fmt
+
+(******************************************************************************)
+ (* Generic scanning *)
+
+(* Make a generic scanning function. *)
+(* Scan a stream according to a format and readers obtained by
+ take_format_readers, and aggegate scanned values into an
+ heterogeneous list. *)
+(* Return the heterogeneous list of scanned values. *)
+let rec make_scanf : type a c d e f .
+ Scanning.in_channel -> (a, Scanning.in_channel, c, d, e, f) fmt ->
+ (d, _) heter_list -> (a, f) heter_list =
+fun ib fmt readers -> match fmt with
+ | Char rest ->
+ let _ = scan_char 0 ib in
+ let c = token_char ib in
+ Cons (c, make_scanf ib rest readers)
+ | Caml_char rest ->
+ let _ = scan_caml_char 0 ib in
+ let c = token_char ib in
+ Cons (c, make_scanf ib rest readers)
+
+ | String (pad, Formatting_lit (fmting_lit, rest)) ->
+ let stp, str = stopper_of_formatting_lit fmting_lit in
+ let scan width _ ib = scan_string (Some stp) width ib in
+ let str_rest = String_literal (str, rest) in
+ pad_prec_scanf ib str_rest readers pad No_precision scan token_string
+ | String (pad, rest) ->
+ let scan width _ ib = scan_string None width ib in
+ pad_prec_scanf ib rest readers pad No_precision scan token_string
+
+ | Caml_string (pad, rest) ->
+ let scan width _ ib = scan_caml_string width ib in
+ pad_prec_scanf ib rest readers pad No_precision scan token_string
+ | Int (iconv, pad, prec, rest) ->
+ let c = char_of_iconv iconv in
+ let scan width _ ib = scan_int_conv c width ib in
+ pad_prec_scanf ib rest readers pad prec scan (token_int c)
+ | Int32 (iconv, pad, prec, rest) ->
+ let c = char_of_iconv iconv in
+ let scan width _ ib = scan_int_conv c width ib in
+ pad_prec_scanf ib rest readers pad prec scan (token_int32 c)
+ | Nativeint (iconv, pad, prec, rest) ->
+ let c = char_of_iconv iconv in
+ let scan width _ ib = scan_int_conv c width ib in
+ pad_prec_scanf ib rest readers pad prec scan (token_nativeint c)
+ | Int64 (iconv, pad, prec, rest) ->
+ let c = char_of_iconv iconv in
+ let scan width _ ib = scan_int_conv c width ib in
+ pad_prec_scanf ib rest readers pad prec scan (token_int64 c)
+ | Float (Float_F, pad, prec, rest) ->
+ pad_prec_scanf ib rest readers pad prec scan_caml_float token_float
+ | Float ((Float_f | Float_pf | Float_sf | Float_e | Float_pe | Float_se
+ | Float_E | Float_pE | Float_sE | Float_g | Float_pg | Float_sg
+ | Float_G | Float_pG | Float_sG), pad, prec, rest) ->
+ pad_prec_scanf ib rest readers pad prec scan_float token_float
+
+ | Bool rest ->
+ let _ = scan_bool ib in
+ let b = token_bool ib in
+ Cons (b, make_scanf ib rest readers)
+ | Alpha _ ->
+ invalid_arg "scanf: bad conversion \"%a\""
+ | Theta _ ->
+ invalid_arg "scanf: bad conversion \"%t\""
+ | Reader fmt_rest ->
+ let Cons (reader, readers_rest) = readers in
+ let x = reader ib in
+ Cons (x, make_scanf ib fmt_rest readers_rest)
+ | Flush rest ->
+ if Scanning.end_of_input ib then make_scanf ib rest readers
+ else bad_input "end of input not found"
+
+ | String_literal (str, rest) ->
+ String.iter (check_char ib) str;
+ make_scanf ib rest readers
+ | Char_literal (chr, rest) ->
+ check_char ib chr;
+ make_scanf ib rest readers
+
+ | Format_arg (pad_opt, fmtty, rest) ->
+ let _ = scan_caml_string (width_of_pad_opt pad_opt) ib in
+ let s = token_string ib in
+ let fmt =
+ try format_of_string_fmtty s fmtty
+ with Failure msg -> bad_input msg
+ in
+ Cons (fmt, make_scanf ib rest readers)
+ | Format_subst (pad_opt, fmtty, rest) ->
+ let _ = scan_caml_string (width_of_pad_opt pad_opt) ib in
+ let s = token_string ib in
+ let fmt, fmt' =
+ try
+ let Fmt_EBB fmt = fmt_ebb_of_string s in
+ let Fmt_EBB fmt' = fmt_ebb_of_string s in
+ (* TODO: find a way to avoid reparsing twice *)
+
+ (* TODO: these type-checks below *can* fail because of type
+ ambiguity in presence of ignored-readers: "%_r%d" and "%d%_r"
+ are typed in the same way.
+
+ # Scanf.sscanf "\"%_r%d\"3" "%(%d%_r%)" ignore
+ (fun fmt n -> string_of_format fmt, n);;
+ Exception: CamlinternalFormat.Type_mismatch.
+
+ We should properly catch this exception.
*)
- (* For conversion %{%}, just return this format string as the token
- read and go on with the rest of the format string argument. *)
- if conv = '{' (* '}' *) then scan_fmt ir (stack f rf) j else
- (* Or else, return this format string as the first token read;
- then continue scanning using this format string to get
- the following token read;
- finally go on with the rest of the format string argument. *)
- let ir, nf = scan (string_to_format rf) ir (stack f rf) 0 in
- (* Return the format string read and the value just read,
- then go on with the rest of the format. *)
- scan_fmt ir nf j
-
- | c -> bad_conversion fmt i c
-
- and scan_indication j =
- if j > lim then j - 1, [] else
- match Sformat.get fmt j with
- | '@' ->
- let k = j + 1 in
- if k > lim then j - 1, [] else
- begin match Sformat.get fmt k with
- | '%' ->
- let k = k + 1 in
- if k > lim then j - 1, [] else
- begin match Sformat.get fmt k with
- | '%' | '@' as c -> k, [ c ]
- | _c -> j - 1, []
- end
- | c -> k, [ c ]
- end
- | _c -> j - 1, [] in
-
- scan_fmt in
-
-
- Scanning.reset_token ib;
-
- let v =
- try snd (scan fmt 0 (fun () -> f) 0) with
- | (Scan_failure _ | Failure _ | End_of_file) as exc ->
- stack (delay ef ib) exc in
- return v
-;;
-
-let mkscanf ib ef fmt =
- let sc = scan_format ib ef in
- ascanf sc fmt
-;;
-
-let kscanf ib ef fmt = mkscanf ib ef fmt;;
-
-let bscanf ib = kscanf ib scanf_bad_input;;
-
-let fscanf ic = bscanf (Scanning.from_channel ic);;
-
-let sscanf : string -> ('a, 'b, 'c, 'd) scanner
- = fun s -> bscanf (Scanning.from_string s);;
-
-let scanf fmt = bscanf Scanning.stdib fmt;;
-
-let bscanf_format ib fmt f =
- let fmt = Sformat.unsafe_to_string fmt in
- let fmt1 =
- ignore (scan_String max_int ib);
- token_string ib in
- if not (compatible_format_type fmt1 fmt) then
- format_mismatch fmt1 fmt else
- f (string_to_format fmt1)
-;;
+ type_format fmt (erase_rel fmtty),
+ type_format fmt' (erase_rel (symm fmtty))
+ with Failure msg -> bad_input msg
+ in
+ Cons (Format (fmt, s),
+ make_scanf ib (concat_fmt fmt' rest) readers)
+
+ | Scan_char_set (width_opt, char_set, Formatting_lit (fmting_lit, rest)) ->
+ let stp, str = stopper_of_formatting_lit fmting_lit in
+ let width = width_of_pad_opt width_opt in
+ let _ = scan_chars_in_char_set char_set (Some stp) width ib in
+ let s = token_string ib in
+ let str_rest = String_literal (str, rest) in
+ Cons (s, make_scanf ib str_rest readers)
+ | Scan_char_set (width_opt, char_set, rest) ->
+ let width = width_of_pad_opt width_opt in
+ let _ = scan_chars_in_char_set char_set None width ib in
+ let s = token_string ib in
+ Cons (s, make_scanf ib rest readers)
+ | Scan_get_counter (counter, rest) ->
+ let count = get_counter ib counter in
+ Cons (count, make_scanf ib rest readers)
+
+ | Formatting_lit (formatting_lit, rest) ->
+ String.iter (check_char ib) (string_of_formatting_lit formatting_lit);
+ make_scanf ib rest readers
+ | Formatting_gen (Open_tag (Format (fmt', _)), rest) ->
+ check_char ib '@'; check_char ib '{';
+ make_scanf ib (concat_fmt fmt' rest) readers
+ | Formatting_gen (Open_box (Format (fmt', _)), rest) ->
+ check_char ib '@'; check_char ib '[';
+ make_scanf ib (concat_fmt fmt' rest) readers
+
+ | Ignored_param (ign, rest) ->
+ let Param_format_EBB fmt' = param_format_of_ignored_format ign rest in
+ begin match make_scanf ib fmt' readers with
+ | Cons (_, arg_rest) -> arg_rest
+ | Nil -> assert false
+ end
-let sscanf_format s fmt = bscanf_format (Scanning.from_string s) fmt;;
+ | End_of_format ->
+ Nil
+
+(* Case analysis on padding and precision. *)
+(* Reject formats containing "%*" or "%.*". *)
+(* Pass padding and precision to the generic scanner `scan'. *)
+and pad_prec_scanf : type a c d e f x y z t .
+ Scanning.in_channel -> (a, Scanning.in_channel, c, d, e, f) fmt ->
+ (d, _) heter_list -> (x, y) padding -> (y, z -> a) precision ->
+ (int -> int -> Scanning.in_channel -> t) ->
+ (Scanning.in_channel -> z) ->
+ (x, f) heter_list =
+fun ib fmt readers pad prec scan token -> match pad, prec with
+ | No_padding, No_precision ->
+ let _ = scan max_int max_int ib in
+ let x = token ib in
+ Cons (x, make_scanf ib fmt readers)
+ | No_padding, Lit_precision p ->
+ let _ = scan max_int p ib in
+ let x = token ib in
+ Cons (x, make_scanf ib fmt readers)
+ | Lit_padding ((Right | Zeros), w), No_precision ->
+ let _ = scan w max_int ib in
+ let x = token ib in
+ Cons (x, make_scanf ib fmt readers)
+ | Lit_padding ((Right | Zeros), w), Lit_precision p ->
+ let _ = scan w p ib in
+ let x = token ib in
+ Cons (x, make_scanf ib fmt readers)
+ | Lit_padding (Left, _), _ ->
+ invalid_arg "scanf: bad conversion \"%-\""
+ | Lit_padding ((Right | Zeros), _), Arg_precision ->
+ invalid_arg "scanf: bad conversion \"%*\""
+ | Arg_padding _, _ ->
+ invalid_arg "scanf: bad conversion \"%*\""
+ | No_padding, Arg_precision ->
+ invalid_arg "scanf: bad conversion \"%*\""
+
+(******************************************************************************)
+ (* Defining [scanf] and various flavors of [scanf] *)
+
+type 'a kscanf_result = Args of 'a | Exc of exn
+
+let kscanf ib ef (Format (fmt, str)) =
+ let rec apply : type a b . a -> (a, b) heter_list -> b =
+ fun f args -> match args with
+ | Cons (x, r) -> apply (f x) r
+ | Nil -> f
+ in
+ let k readers f =
+ Scanning.reset_token ib;
+ match try Args (make_scanf ib fmt readers) with
+ | (Scan_failure _ | Failure _ | End_of_file) as exc -> Exc exc
+ | Invalid_argument msg ->
+ invalid_arg (msg ^ " in format \"" ^ String.escaped str ^ "\"")
+ with
+ | Args args -> apply f args
+ | Exc exc -> ef ib exc
+ in
+ take_format_readers k fmt
+
+let kbscanf = kscanf
+
+(***)
+
+let ksscanf s ef fmt = kbscanf (Scanning.from_string s) ef fmt
+let kfscanf ic ef fmt = kbscanf (Scanning.from_channel ic) ef fmt
+let bscanf ib fmt = kscanf ib scanf_bad_input fmt
+let fscanf ic fmt = kscanf (Scanning.from_channel ic) scanf_bad_input fmt
+let sscanf s fmt = kscanf (Scanning.from_string s) scanf_bad_input fmt
+let scanf fmt = kscanf Scanning.stdib scanf_bad_input fmt
+
+(***)
+
+let bscanf_format : Scanning.in_channel -> ('a, 'b, 'c, 'd, 'e, 'f) format6 ->
+ (('a, 'b, 'c, 'd, 'e, 'f) format6 -> 'g) -> 'g =
+ fun ib format f ->
+ let _ = scan_caml_string max_int ib in
+ let str = token_string ib in
+ let fmt' =
+ try format_of_string_format str format
+ with Failure msg -> bad_input msg
+ in
+ f fmt'
+
+let sscanf_format : string -> ('a, 'b, 'c, 'd, 'e, 'f) format6 ->
+ (('a, 'b, 'c, 'd, 'e, 'f) format6 -> 'g) -> 'g =
+ fun s format f -> bscanf_format (Scanning.from_string s) format f
let string_to_String s =
let l = String.length s in
@@ -1566,9 +1352,3 @@ let format_from_string s fmt =
let unescaped s =
sscanf ("\"" ^ s ^ "\"") "%S%!" (fun x -> x)
;;
-
-(*
- Local Variables:
- compile-command: "cd ..; make world"
- End:
-*)
diff --git a/stdlib/scanf.mli b/stdlib/scanf.mli
index a1b3d1acb..297d6f2d5 100644
--- a/stdlib/scanf.mli
+++ b/stdlib/scanf.mli
@@ -485,6 +485,16 @@ val kscanf :
exception that aborted the scanning process as arguments.
*)
+val ksscanf :
+ string -> (Scanning.in_channel -> exn -> 'd) ->
+ ('a, 'b, 'c, 'd) scanner
+(** Same as {!Scanf.kscanf} but reads from the given string. *)
+
+val kfscanf :
+ Pervasives.in_channel -> (Scanning.in_channel -> exn -> 'd) ->
+ ('a, 'b, 'c, 'd) scanner
+(** Same as {!Scanf.kscanf}, but reads from the given regular input channel. *)
+
(** {6 Reading format strings from input} *)
val bscanf_format :
diff --git a/stdlib/set.ml b/stdlib/set.ml
index 1d049b139..5c6212d88 100644
--- a/stdlib/set.ml
+++ b/stdlib/set.ml
@@ -363,7 +363,8 @@ module Make(Ord: OrderedType) =
| 0, l -> Empty, l
| 1, x0 :: l -> Node (Empty, x0, Empty, 1), l
| 2, x0 :: x1 :: l -> Node (Node(Empty, x0, Empty, 1), x1, Empty, 2), l
- | 3, x0 :: x1 :: x2 :: l -> Node (Node(Empty, x0, Empty, 1), x1, Node(Empty, x2, Empty, 1), 2), l
+ | 3, x0 :: x1 :: x2 :: l ->
+ Node (Node(Empty, x0, Empty, 1), x1, Node(Empty, x2, Empty, 1), 2),l
| n, l ->
let nl = n / 2 in
let left, l = sub nl l in
diff --git a/stdlib/set.mli b/stdlib/set.mli
index 556ee9388..1b67398ee 100644
--- a/stdlib/set.mli
+++ b/stdlib/set.mli
@@ -19,6 +19,27 @@
The implementation uses balanced binary trees, and is therefore
reasonably efficient: insertion and membership take time
logarithmic in the size of the set, for instance.
+
+ The [Make] functor constructs implementations for any type, given a
+ [compare] function.
+ For instance:
+ {[
+ module IntPairs =
+ struct
+ type t = int * int
+ let compare (x0,y0) (x1,y1) =
+ match Pervasives.compare x0 x1 with
+ 0 -> Pervasives.compare y0 y1
+ | c -> c
+ end
+
+ module PairsSet = Set.Make(IntPairs)
+
+ let m = PairsSet.(empty |> add (2,3) |> add (5,7) |> add (11,13))
+ ]}
+
+ This creates a new module [PairsSet], with a new type [PairsSet.t]
+ of sets of [int * int].
*)
module type OrderedType =
diff --git a/stdlib/sort.mli b/stdlib/sort.mli
index d5abb79fa..a9be27e13 100644
--- a/stdlib/sort.mli
+++ b/stdlib/sort.mli
@@ -20,11 +20,13 @@
*)
val list : ('a -> 'a -> bool) -> 'a list -> 'a list
+ [@@ocaml.deprecated]
(** Sort a list in increasing order according to an ordering predicate.
The predicate should return [true] if its first argument is
less than or equal to its second argument. *)
val array : ('a -> 'a -> bool) -> 'a array -> unit
+ [@@ocaml.deprecated]
(** Sort an array in increasing order according to an
ordering predicate.
The predicate should return [true] if its first argument is
@@ -32,6 +34,7 @@ val array : ('a -> 'a -> bool) -> 'a array -> unit
The array is sorted in place. *)
val merge : ('a -> 'a -> bool) -> 'a list -> 'a list -> 'a list
+ [@@ocaml.deprecated]
(** Merge two lists according to the given predicate.
Assuming the two argument lists are sorted according to the
predicate, [merge] returns a sorted list containing the elements
diff --git a/stdlib/stdLabels.ml b/stdlib/stdLabels.ml
index 35b25e0b7..60713ca0c 100644
--- a/stdlib/stdLabels.ml
+++ b/stdlib/stdLabels.ml
@@ -18,3 +18,5 @@ module Array = ArrayLabels
module List = ListLabels
module String = StringLabels
+
+module Bytes = BytesLabels
diff --git a/stdlib/stdLabels.mli b/stdlib/stdLabels.mli
index 144936f17..c607a9987 100644
--- a/stdlib/stdLabels.mli
+++ b/stdlib/stdLabels.mli
@@ -14,12 +14,14 @@
(** Standard labeled libraries.
This meta-module provides labelized version of the {!Array},
- {!List} and {!String} modules.
+ {!Bytes}, {!List} and {!String} modules.
They only differ by their labels. Detailed interfaces can be found
- in [arrayLabels.mli], [listLabels.mli] and [stringLabels.mli].
+ in [arrayLabels.mli], [bytesLabels.mli], [listLabels.mli]
+ and [stringLabels.mli].
*)
module Array = ArrayLabels
+module Bytes = BytesLabels
module List = ListLabels
module String = StringLabels
diff --git a/stdlib/stream.ml b/stdlib/stream.ml
index 753bce005..751c741a8 100644
--- a/stdlib/stream.ml
+++ b/stdlib/stream.ml
@@ -25,7 +25,7 @@ and 'a data =
| Sbuffio of buffio
and 'a gen = { mutable curr : 'a option option; func : int -> 'a option }
and buffio =
- { ic : in_channel; buff : string; mutable len : int; mutable ind : int }
+ { ic : in_channel; buff : bytes; mutable len : int; mutable ind : int }
;;
exception Failure;;
exception Error of string;;
@@ -37,7 +37,7 @@ let set_data (s : 'a t) (d : 'a data) =
;;
let fill_buff b =
- b.len <- input b.ic b.buff 0 (String.length b.buff); b.ind <- 0
+ b.len <- input b.ic b.buff 0 (Bytes.length b.buff); b.ind <- 0
;;
let rec get_data count d = match d with
@@ -64,7 +64,7 @@ let rec get_data count d = match d with
| Sbuffio b ->
if b.ind >= b.len then fill_buff b;
if b.len == 0 then Sempty else
- let r = Obj.magic (String.unsafe_get b.buff b.ind) in
+ let r = Obj.magic (Bytes.unsafe_get b.buff b.ind) in
(* Warning: anyone using g thinks that an item has been read *)
b.ind <- succ b.ind; Scons(r, d)
| Slazy f -> get_data count (Lazy.force f)
@@ -87,7 +87,7 @@ let rec peek s =
| Sbuffio b ->
if b.ind >= b.len then fill_buff b;
if b.len == 0 then begin set_data s Sempty; None end
- else Some (Obj.magic (String.unsafe_get b.buff b.ind))
+ else Some (Obj.magic (Bytes.unsafe_get b.buff b.ind))
;;
let rec junk s =
@@ -159,9 +159,18 @@ let of_string s =
else None)
;;
+let of_bytes s =
+ let count = ref 0 in
+ from (fun _ ->
+ let c = !count in
+ if c < Bytes.length s
+ then (incr count; Some (Bytes.get s c))
+ else None)
+;;
+
let of_channel ic =
{count = 0;
- data = Sbuffio {ic = ic; buff = String.create 4096; len = 0; ind = 0}}
+ data = Sbuffio {ic = ic; buff = Bytes.create 4096; len = 0; ind = 0}}
;;
(* Stream expressions builders *)
diff --git a/stdlib/stream.mli b/stdlib/stream.mli
index aeb0da1e8..85a846102 100644
--- a/stdlib/stream.mli
+++ b/stdlib/stream.mli
@@ -46,6 +46,9 @@ val of_list : 'a list -> 'a t
val of_string : string -> char t
(** Return the stream of the characters of the string parameter. *)
+val of_bytes : bytes -> char t
+(** Return the stream of the characters of the bytes parameter. *)
+
val of_channel : in_channel -> char t
(** Return the stream of the characters read from the input channel. *)
diff --git a/stdlib/string.ml b/stdlib/string.ml
index fda40b527..93880af26 100644
--- a/stdlib/string.ml
+++ b/stdlib/string.ml
@@ -2,72 +2,53 @@
(* *)
(* OCaml *)
(* *)
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* Damien Doligez, projet Gallium, INRIA Rocquencourt *)
(* *)
-(* Copyright 1996 Institut National de Recherche en Informatique et *)
+(* Copyright 2014 Institut National de Recherche en Informatique et *)
(* en Automatique. All rights reserved. This file is distributed *)
(* under the terms of the GNU Library General Public License, with *)
(* the special exception on linking described in file ../LICENSE. *)
(* *)
(***********************************************************************)
-(* String operations *)
+(* String operations, based on byte sequence operations *)
external length : string -> int = "%string_length"
external get : string -> int -> char = "%string_safe_get"
-external set : string -> int -> char -> unit = "%string_safe_set"
-external create : int -> string = "caml_create_string"
+external set : bytes -> int -> char -> unit = "%string_safe_set"
+external create : int -> bytes = "caml_create_string"
external unsafe_get : string -> int -> char = "%string_unsafe_get"
-external unsafe_set : string -> int -> char -> unit = "%string_unsafe_set"
-external unsafe_blit : string -> int -> string -> int -> int -> unit
+external unsafe_set : bytes -> int -> char -> unit = "%string_unsafe_set"
+external unsafe_blit : string -> int -> bytes -> int -> int -> unit
= "caml_blit_string" "noalloc"
-external unsafe_fill : string -> int -> int -> char -> unit
+external unsafe_fill : bytes -> int -> int -> char -> unit
= "caml_fill_string" "noalloc"
-let make n c =
- let s = create n in
- unsafe_fill s 0 n c;
- s
+module B = Bytes
-let copy s =
- let len = length s in
- let r = create len in
- unsafe_blit s 0 r 0 len;
- r
+let bts = B.unsafe_to_string
+let bos = B.unsafe_of_string
+let make n c =
+ B.make n c |> bts
+let init n f =
+ B.init n f |> bts
+let copy s =
+ B.copy (bos s) |> bts
let sub s ofs len =
- if ofs < 0 || len < 0 || ofs > length s - len
- then invalid_arg "String.sub"
- else begin
- let r = create len in
- unsafe_blit s ofs r 0 len;
- r
- end
-
-let fill s ofs len c =
- if ofs < 0 || len < 0 || ofs > length s - len
- then invalid_arg "String.fill"
- else unsafe_fill s ofs len c
-
-let blit s1 ofs1 s2 ofs2 len =
- if len < 0 || ofs1 < 0 || ofs1 > length s1 - len
- || ofs2 < 0 || ofs2 > length s2 - len
- then invalid_arg "String.blit"
- else unsafe_blit s1 ofs1 s2 ofs2 len
-
-let iter f a =
- for i = 0 to length a - 1 do f(unsafe_get a i) done
-
-let iteri f a =
- for i = 0 to length a - 1 do f i (unsafe_get a i) done
+ B.sub (bos s) ofs len |> bts
+let fill =
+ B.fill
+let blit =
+ B.blit_string
let concat sep l =
match l with
- [] -> ""
+ | [] -> ""
| hd :: tl ->
let num = ref 0 and len = ref 0 in
List.iter (fun s -> incr num; len := !len + length s) l;
- let r = create (!len + length sep * (!num - 1)) in
+ let r = B.create (!len + length sep * (!num - 1)) in
unsafe_blit hd 0 r 0 (length hd);
let pos = ref(length hd) in
List.iter
@@ -77,128 +58,68 @@ let concat sep l =
unsafe_blit s 0 r !pos (length s);
pos := !pos + length s)
tl;
- r
+ Bytes.unsafe_to_string r
+
+let iter f s =
+ B.iter f (bos s)
+let iteri f s =
+ B.iteri f (bos s)
+let map f s =
+ B.map f (bos s) |> bts
+let mapi f s =
+ B.mapi f (bos s) |> bts
+
+(* Beware: we cannot use B.trim or B.escape because they always make a
+ copy, but String.mli spells out some cases where we are not allowed
+ to make a copy. *)
external is_printable: char -> bool = "caml_is_printable"
-external char_code: char -> int = "%identity"
-external char_chr: int -> char = "%identity"
let is_space = function
| ' ' | '\012' | '\n' | '\r' | '\t' -> true
| _ -> false
let trim s =
- let len = length s in
- let i = ref 0 in
- while !i < len && is_space (unsafe_get s !i) do
- incr i
- done;
- let j = ref (len - 1) in
- while !j >= !i && is_space (unsafe_get s !j) do
- decr j
- done;
- if !i = 0 && !j = len - 1 then
- s
- else if !j >= !i then
- sub s !i (!j - !i + 1)
- else
- ""
+ if s = "" then s
+ else if is_space (unsafe_get s 0) || is_space (unsafe_get s (length s - 1))
+ then bts (B.trim (bos s))
+ else s
let escaped s =
- let n = ref 0 in
- for i = 0 to length s - 1 do
- n := !n +
- (match unsafe_get s i with
- | '"' | '\\' | '\n' | '\t' | '\r' | '\b' -> 2
- | c -> if is_printable c then 1 else 4)
- done;
- if !n = length s then s else begin
- let s' = create !n in
- n := 0;
- for i = 0 to length s - 1 do
- begin
- match unsafe_get s i with
- | ('"' | '\\') as c ->
- unsafe_set s' !n '\\'; incr n; unsafe_set s' !n c
- | '\n' ->
- unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'n'
- | '\t' ->
- unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 't'
- | '\r' ->
- unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'r'
- | '\b' ->
- unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'b'
- | c ->
- if is_printable c then
- unsafe_set s' !n c
- else begin
- let a = char_code c in
- unsafe_set s' !n '\\';
- incr n;
- unsafe_set s' !n (char_chr (48 + a / 100));
- incr n;
- unsafe_set s' !n (char_chr (48 + (a / 10) mod 10));
- incr n;
- unsafe_set s' !n (char_chr (48 + a mod 10))
- end
- end;
- incr n
- done;
- s'
- end
-
-let map f s =
- let l = length s in
- if l = 0 then s else begin
- let r = create l in
- for i = 0 to l - 1 do unsafe_set r i (f(unsafe_get s i)) done;
- r
- end
-
-let uppercase s = map Char.uppercase s
-let lowercase s = map Char.lowercase s
-
-let apply1 f s =
- if length s = 0 then s else begin
- let r = copy s in
- unsafe_set r 0 (f(unsafe_get s 0));
- r
- end
-
-let capitalize s = apply1 Char.uppercase s
-let uncapitalize s = apply1 Char.lowercase s
-
-let rec index_rec s lim i c =
- if i >= lim then raise Not_found else
- if unsafe_get s i = c then i else index_rec s lim (i + 1) c;;
-
-let index s c = index_rec s (length s) 0 c;;
-
-let index_from s i c =
- let l = length s in
- if i < 0 || i > l then invalid_arg "String.index_from" else
- index_rec s l i c;;
-
-let rec rindex_rec s i c =
- if i < 0 then raise Not_found else
- if unsafe_get s i = c then i else rindex_rec s (i - 1) c;;
-
-let rindex s c = rindex_rec s (length s - 1) c;;
+ let rec needs_escape i =
+ if i >= length s then false else
+ match unsafe_get s i with
+ | '"' | '\\' | '\n' | '\t' | '\r' | '\b' -> true
+ | c when is_printable c -> needs_escape (i+1)
+ | _ -> true
+ in
+ if needs_escape 0 then
+ bts (B.escaped (bos s))
+ else
+ s
+let index s c =
+ B.index (bos s) c
+let rindex s c =
+ B.rindex (bos s) c
+let index_from s i c=
+ B.index_from (bos s) i c
let rindex_from s i c =
- if i < -1 || i >= length s then invalid_arg "String.rindex_from" else
- rindex_rec s i c;;
-
+ B.rindex_from (bos s) i c
+let contains s c =
+ B.contains (bos s) c
let contains_from s i c =
- let l = length s in
- if i < 0 || i > l then invalid_arg "String.contains_from" else
- try ignore (index_rec s l i c); true with Not_found -> false;;
-
-let contains s c = contains_from s 0 c;;
-
+ B.contains_from (bos s) i c
let rcontains_from s i c =
- if i < 0 || i >= length s then invalid_arg "String.rcontains_from" else
- try ignore (rindex_rec s i c); true with Not_found -> false;;
+ B.rcontains_from (bos s) i c
+let uppercase s =
+ B.uppercase (bos s) |> bts
+let lowercase s =
+ B.lowercase (bos s) |> bts
+let capitalize s =
+ B.capitalize (bos s) |> bts
+let uncapitalize s =
+ B.uncapitalize (bos s) |> bts
type t = string
diff --git a/stdlib/string.mli b/stdlib/string.mli
index 14f2c82db..8f1e178b5 100644
--- a/stdlib/string.mli
+++ b/stdlib/string.mli
@@ -13,41 +13,36 @@
(** String operations.
- Given a string [s] of length [l], we call character number in [s]
- the index of a character in [s]. Indexes start at [0], and we will
- call a character number valid in [s] if it falls within the range
- [[0...l-1]]. A position is the point between two characters or at
- the beginning or end of the string. We call a position valid
- in [s] if it falls within the range [[0...l]]. Note that character
- number [n] is between positions [n] and [n+1].
+ A string is an immutable data structure that contains a
+ fixed-length sequence of (single-byte) characters. Each character
+ can be accessed in constant time through its index.
+
+ Given a string [s] of length [l], we can access each of the [l]
+ characters of [s] via its index in the sequence. Indexes start at
+ [0], and we will call an index valid in [s] if it falls within the
+ range [[0...l-1]] (inclusive). A position is the point between two
+ characters or at the beginning or end of the string. We call a
+ position valid in [s] if it falls within the range [[0...l]]
+ (inclusive). Note that the character at index [n] is between
+ positions [n] and [n+1].
Two parameters [start] and [len] are said to designate a valid
substring of [s] if [len >= 0] and [start] and [start+len] are
valid positions in [s].
- OCaml strings can be modified in place, for instance via the
- {!String.set} and {!String.blit} functions described below. This
- possibility should be used rarely and with much care, however, since
- both the OCaml compiler and most OCaml libraries share strings as if
- they were immutable, rather than copying them. In particular,
- string literals are shared: a single copy of the string is created
- at program loading time and returned by all evaluations of the
- string literal. Consider for example:
-
- {[
- # let f () = "foo";;
- val f : unit -> string = <fun>
- # (f ()).[0] <- 'b';;
- - : unit = ()
- # f ();;
- - : string = "boo"
- ]}
-
- Likewise, many functions from the standard library can return string
- literals or one of their string arguments. Therefore, the returned strings
- must not be modified directly. If mutation is absolutely necessary,
- it should be performed on a fresh copy of the string, as produced by
- {!String.copy}.
+ OCaml strings used to be modifiable in place, for instance via the
+ {!String.set} and {!String.blit} functions described below. This
+ usage is deprecated and only possible when the compiler is put in
+ "unsafe-string" mode by giving the [-unsafe-string] command-line
+ option (which is currently the default for reasons of backward
+ compatibility). This is done by making the types [string] and
+ [bytes] (see module {!Bytes}) interchangeable so that functions
+ expecting byte sequences can also accept strings as arguments and
+ modify them.
+
+ All new code should avoid this feature and be compiled with the
+ [-safe-string] command-line option to enforce the separation between
+ the types [string] and [bytes].
*)
@@ -55,33 +50,51 @@ external length : string -> int = "%string_length"
(** Return the length (number of characters) of the given string. *)
external get : string -> int -> char = "%string_safe_get"
-(** [String.get s n] returns character number [n] in string [s].
+(** [String.get s n] returns the character at index [n] in string [s].
You can also write [s.[n]] instead of [String.get s n].
- Raise [Invalid_argument] if [n] not a valid character number in [s]. *)
+ Raise [Invalid_argument] if [n] not a valid index in [s]. *)
-external set : string -> int -> char -> unit = "%string_safe_set"
-(** [String.set s n c] modifies string [s] in place,
- replacing the character number [n] by [c].
+external set : bytes -> int -> char -> unit = "%string_safe_set"
+ [@@ocaml.deprecated]
+(** [String.set s n c] modifies byte sequence [s] in place,
+ replacing the byte at index [n] with [c].
You can also write [s.[n] <- c] instead of [String.set s n c].
- Raise [Invalid_argument] if [n] is not a valid character number in [s]. *)
+ Raise [Invalid_argument] if [n] is not a valid index in [s].
-external create : int -> string = "caml_create_string"
-(** [String.create n] returns a fresh string of length [n].
- The string initially contains arbitrary characters.
+ @deprecated This is a deprecated alias of {!Bytes.set}.[ ] *)
- Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *)
+external create : int -> bytes = "caml_create_string" [@@ocaml.deprecated]
+(** [String.create n] returns a fresh byte sequence of length [n].
+ The sequence is uninitialized and contains arbitrary bytes.
+
+ Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}.
+
+ @deprecated This is a deprecated alias of {!Bytes.create}.[ ] *)
val make : int -> char -> string
(** [String.make n c] returns a fresh string of length [n],
filled with the character [c].
- Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}.*)
+ Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *)
+
+val init : int -> (int -> char) -> string
+(** [String.init n f] returns a string of length [n], with character
+ [i] initialized to the result of [f i] (called in increasing
+ index order).
+
+ Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}.
+
+ @since 4.02.0
+*)
-val copy : string -> string
-(** Return a copy of the given string. *)
+val copy : string -> string [@@ocaml.deprecated]
+(** Return a copy of the given string.
+
+ @deprecated Because strings are immutable, it doesn't make much
+ sense to make identical copies of them. *)
val sub : string -> int -> int -> string
(** [String.sub s start len] returns a fresh string of length [len],
@@ -91,27 +104,24 @@ val sub : string -> int -> int -> string
Raise [Invalid_argument] if [start] and [len] do not
designate a valid substring of [s]. *)
-val fill : string -> int -> int -> char -> unit
-(** [String.fill s start len c] modifies string [s] in place,
- replacing [len] characters by [c], starting at [start].
+val fill : bytes -> int -> int -> char -> unit [@@ocaml.deprecated]
+(** [String.fill s start len c] modifies byte sequence [s] in place,
+ replacing [len] bytes with [c], starting at [start].
Raise [Invalid_argument] if [start] and [len] do not
- designate a valid substring of [s]. *)
+ designate a valid range of [s].
-val blit : string -> int -> string -> int -> int -> unit
-(** [String.blit src srcoff dst dstoff len] copies [len] characters
- from string [src], starting at character number [srcoff], to
- string [dst], starting at character number [dstoff]. It works
- correctly even if [src] and [dst] are the same string,
- and the source and destination intervals overlap.
+ @deprecated This is a deprecated alias of {!Bytes.fill}.[ ] *)
- Raise [Invalid_argument] if [srcoff] and [len] do not
- designate a valid substring of [src], or if [dstoff] and [len]
- do not designate a valid substring of [dst]. *)
+val blit : string -> int -> bytes -> int -> int -> unit
+(** Same as {!Bytes.blit_string}. *)
val concat : string -> string list -> string
(** [String.concat sep sl] concatenates the list of strings [sl],
- inserting the separator string [sep] between each. *)
+ inserting the separator string [sep] between each.
+
+ Raise [Invalid_argument] if the result is longer than
+ {!Sys.max_string_length} bytes. *)
val iter : (char -> unit) -> string -> unit
(** [String.iter f s] applies function [f] in turn to all
@@ -122,19 +132,24 @@ val iteri : (int -> char -> unit) -> string -> unit
(** Same as {!String.iter}, but the
function is applied to the index of the element as first argument
(counting from 0), and the character itself as second argument.
- @since 4.00.0
-*)
+ @since 4.00.0 *)
val map : (char -> char) -> string -> string
-(** [String.map f s] applies function [f] in turn to all
- the characters of [s] and stores the results in a new string that
- is returned.
- @since 4.00.0 *)
+(** [String.map f s] applies function [f] in turn to all the
+ characters of [s] (in increasing index order) and stores the
+ results in a new string that is returned.
+ @since 4.00.0 *)
+
+val mapi : (int -> char -> char) -> string -> string
+(** [String.mapi f s] calls [f] with each character of [s] and its
+ index (in increasing index order) and stores the results in a new
+ string that is returned.
+ @since 4.02.0 *)
val trim : string -> string
(** Return a copy of the argument, without leading and trailing
whitespace. The characters regarded as whitespace are: [' '],
- ['\012'], ['\n'], ['\r'], and ['\t']. If there is no leading nor
+ ['\012'], ['\n'], ['\r'], and ['\t']. If there is neither leading nor
trailing whitespace character in the argument, return the original
string itself, not a copy.
@since 4.00.0 *)
@@ -144,22 +159,25 @@ val escaped : string -> string
represented by escape sequences, following the lexical
conventions of OCaml. If there is no special
character in the argument, return the original string itself,
- not a copy. Its inverse function is Scanf.unescaped. *)
+ not a copy. Its inverse function is Scanf.unescaped.
+
+ Raise [Invalid_argument] if the result is longer than
+ {!Sys.max_string_length} bytes. *)
val index : string -> char -> int
-(** [String.index s c] returns the character number of the first
+(** [String.index s c] returns the index of the first
occurrence of character [c] in string [s].
Raise [Not_found] if [c] does not occur in [s]. *)
val rindex : string -> char -> int
-(** [String.rindex s c] returns the character number of the last
+(** [String.rindex s c] returns the index of the last
occurrence of character [c] in string [s].
Raise [Not_found] if [c] does not occur in [s]. *)
val index_from : string -> int -> char -> int
-(** [String.index_from s i c] returns the character number of the
+(** [String.index_from s i c] returns the index of the
first occurrence of character [c] in string [s] after position [i].
[String.index s c] is equivalent to [String.index_from s 0 c].
@@ -167,7 +185,7 @@ val index_from : string -> int -> char -> int
Raise [Not_found] if [c] does not occur in [s] after position [i]. *)
val rindex_from : string -> int -> char -> int
-(** [String.rindex_from s i c] returns the character number of the
+(** [String.rindex_from s i c] returns the index of the
last occurrence of character [c] in string [s] before position [i+1].
[String.rindex s c] is equivalent to
[String.rindex_from s (String.length s - 1) c].
@@ -224,8 +242,11 @@ val compare: t -> t -> int
(* The following is for system use only. Do not call directly. *)
external unsafe_get : string -> int -> char = "%string_unsafe_get"
-external unsafe_set : string -> int -> char -> unit = "%string_unsafe_set"
+external unsafe_set : bytes -> int -> char -> unit = "%string_unsafe_set"
+ [@@ocaml.deprecated]
external unsafe_blit :
- string -> int -> string -> int -> int -> unit = "caml_blit_string" "noalloc"
+ string -> int -> bytes -> int -> int -> unit
+ = "caml_blit_string" "noalloc"
external unsafe_fill :
- string -> int -> int -> char -> unit = "caml_fill_string" "noalloc"
+ bytes -> int -> int -> char -> unit = "caml_fill_string" "noalloc"
+ [@@ocaml.deprecated]
diff --git a/stdlib/stringLabels.mli b/stdlib/stringLabels.mli
index 8e2e6d379..1cf5d51ed 100644
--- a/stdlib/stringLabels.mli
+++ b/stdlib/stringLabels.mli
@@ -17,61 +17,71 @@ external length : string -> int = "%string_length"
(** Return the length (number of characters) of the given string. *)
external get : string -> int -> char = "%string_safe_get"
-(** [String.get s n] returns character number [n] in string [s].
- The first character is character number 0.
- The last character is character number [String.length s - 1].
+(** [String.get s n] returns the character at index [n] in string [s].
You can also write [s.[n]] instead of [String.get s n].
- Raise [Invalid_argument "index out of bounds"]
- if [n] is outside the range 0 to [(String.length s - 1)]. *)
+ Raise [Invalid_argument] if [n] not a valid index in [s]. *)
-
-external set : string -> int -> char -> unit = "%string_safe_set"
-(** [String.set s n c] modifies string [s] in place,
- replacing the character number [n] by [c].
+external set : bytes -> int -> char -> unit = "%string_safe_set"
+ [@@ocaml.deprecated]
+(** [String.set s n c] modifies byte sequence [s] in place,
+ replacing the byte at index [n] with [c].
You can also write [s.[n] <- c] instead of [String.set s n c].
- Raise [Invalid_argument "index out of bounds"]
- if [n] is outside the range 0 to [(String.length s - 1)]. *)
-external create : int -> string = "caml_create_string"
-(** [String.create n] returns a fresh string of length [n].
- The string initially contains arbitrary characters.
- Raise [Invalid_argument] if [n < 0] or [n > Sys.max_string_length].
-*)
+ Raise [Invalid_argument] if [n] is not a valid index in [s].
+
+ @deprecated This is a deprecated alias of {!Bytes.set}. *)
+
+external create : int -> bytes = "caml_create_string" [@@ocaml.deprecated]
+(** [String.create n] returns a fresh byte sequence of length [n].
+ The sequence is uninitialized and contains arbitrary bytes.
+
+ Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}.
+
+ @deprecated This is a deprecated alias of {!Bytes.create}. *)
val make : int -> char -> string
(** [String.make n c] returns a fresh string of length [n],
filled with the character [c].
- Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}.*)
+
+ Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *)
+
+val init : int -> f:(int -> char) -> string
+(** [init n f] returns a string of length [n],
+ with character [i] initialized to the result of [f i].
+
+ Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *)
val copy : string -> string
(** Return a copy of the given string. *)
val sub : string -> pos:int -> len:int -> string
(** [String.sub s start len] returns a fresh string of length [len],
- containing the characters number [start] to [start + len - 1]
- of string [s].
- Raise [Invalid_argument] if [start] and [len] do not
- designate a valid substring of [s]; that is, if [start < 0],
- or [len < 0], or [start + len > ]{!StringLabels.length}[ s]. *)
+ containing the substring of [s] that starts at position [start] and
+ has length [len].
-val fill : string -> pos:int -> len:int -> char -> unit
-(** [String.fill s start len c] modifies string [s] in place,
- replacing the characters number [start] to [start + len - 1]
- by [c].
Raise [Invalid_argument] if [start] and [len] do not
designate a valid substring of [s]. *)
+val fill : bytes -> pos:int -> len:int -> char -> unit [@@ocaml.deprecated]
+(** [String.fill s start len c] modifies byte sequence [s] in place,
+ replacing [len] bytes by [c], starting at [start].
+
+ Raise [Invalid_argument] if [start] and [len] do not
+ designate a valid substring of [s].
+
+ @deprecated This is a deprecated alias of {!Bytes.fill}. *)
+
val blit :
- src:string -> src_pos:int -> dst:string -> dst_pos:int -> len:int -> unit
-(** [String.blit src srcoff dst dstoff len] copies [len] characters
- from string [src], starting at character number [srcoff], to
- string [dst], starting at character number [dstoff]. It works
- correctly even if [src] and [dst] are the same string,
- and the source and destination chunks overlap.
+ src:string -> src_pos:int -> dst:bytes -> dst_pos:int -> len:int
+ -> unit
+(** [String.blit src srcoff dst dstoff len] copies [len] bytes
+ from the string [src], starting at index [srcoff],
+ to byte sequence [dst], starting at character number [dstoff].
+
Raise [Invalid_argument] if [srcoff] and [len] do not
- designate a valid substring of [src], or if [dstoff] and [len]
- do not designate a valid substring of [dst]. *)
+ designate a valid range of [src], or if [dstoff] and [len]
+ do not designate a valid range of [dst]. *)
val concat : sep:string -> string list -> string
(** [String.concat sep sl] concatenates the list of strings [sl],
@@ -86,8 +96,7 @@ val iteri : f:(int -> char -> unit) -> string -> unit
(** Same as {!String.iter}, but the
function is applied to the index of the element as first argument
(counting from 0), and the character itself as second argument.
- @since 4.00.0
-*)
+ @since 4.00.0 *)
val map : f:(char -> char) -> string -> string
(** [String.map f s] applies function [f] in turn to all
@@ -95,11 +104,18 @@ val map : f:(char -> char) -> string -> string
is returned.
@since 4.00.0 *)
+val mapi : f:(int -> char -> char) -> string -> string
+(** [String.mapi f s] calls [f] with each character of [s] and its
+ index (in increasing index order) and stores the results in a new
+ string that is returned.
+ @since 4.02.0 *)
+
val trim : string -> string
-(** Return a copy of the argument, without leading and trailing whitespace.
- The characters regarded as whitespace are: [' '], ['\012'], ['\n'],
- ['\r'], and ['\t']. If there is no whitespace character in the argument,
- return the original string itself, not a copy.
+(** Return a copy of the argument, without leading and trailing
+ whitespace. The characters regarded as whitespace are: [' '],
+ ['\012'], ['\n'], ['\r'], and ['\t']. If there is no leading nor
+ trailing whitespace character in the argument, return the original
+ string itself, not a copy.
@since 4.00.0 *)
val escaped : string -> string
@@ -107,28 +123,36 @@ val escaped : string -> string
represented by escape sequences, following the lexical
conventions of OCaml. If there is no special
character in the argument, return the original string itself,
- not a copy. *)
+ not a copy. Its inverse function is Scanf.unescaped. *)
val index : string -> char -> int
-(** [String.index s c] returns the position of the leftmost
+(** [String.index s c] returns the index of the first
occurrence of character [c] in string [s].
+
Raise [Not_found] if [c] does not occur in [s]. *)
val rindex : string -> char -> int
-(** [String.rindex s c] returns the position of the rightmost
+(** [String.rindex s c] returns the index of the last
occurrence of character [c] in string [s].
+
Raise [Not_found] if [c] does not occur in [s]. *)
val index_from : string -> int -> char -> int
-(** Same as {!StringLabels.index}, but start
- searching at the character position given as second argument.
- [String.index s c] is equivalent to [String.index_from s 0 c].*)
+(** [String.index_from s i c] returns the index of the
+ first occurrence of character [c] in string [s] after position [i].
+ [String.index s c] is equivalent to [String.index_from s 0 c].
+
+ Raise [Invalid_argument] if [i] is not a valid position in [s].
+ Raise [Not_found] if [c] does not occur in [s] after position [i]. *)
val rindex_from : string -> int -> char -> int
-(** Same as {!StringLabels.rindex}, but start
- searching at the character position given as second argument.
+(** [String.rindex_from s i c] returns the index of the
+ last occurrence of character [c] in string [s] before position [i+1].
[String.rindex s c] is equivalent to
- [String.rindex_from s (String.length s - 1) c]. *)
+ [String.rindex_from s (String.length s - 1) c].
+
+ Raise [Invalid_argument] if [i+1] is not a valid position in [s].
+ Raise [Not_found] if [c] does not occur in [s] before position [i+1]. *)
val contains : string -> char -> bool
(** [String.contains s c] tests if character [c]
@@ -136,15 +160,18 @@ val contains : string -> char -> bool
val contains_from : string -> int -> char -> bool
(** [String.contains_from s start c] tests if character [c]
- appears in the substring of [s] starting from [start] to the end
- of [s].
- Raise [Invalid_argument] if [start] is not a valid index of [s]. *)
+ appears in [s] after position [start].
+ [String.contains s c] is equivalent to
+ [String.contains_from s 0 c].
+
+ Raise [Invalid_argument] if [start] is not a valid position in [s]. *)
val rcontains_from : string -> int -> char -> bool
(** [String.rcontains_from s stop c] tests if character [c]
- appears in the substring of [s] starting from the beginning
- of [s] to index [stop].
- Raise [Invalid_argument] if [stop] is not a valid index of [s]. *)
+ appears in [s] before position [stop+1].
+
+ Raise [Invalid_argument] if [stop < 0] or [stop+1] is not a valid
+ position in [s]. *)
val uppercase : string -> string
(** Return a copy of the argument, with all lowercase letters
@@ -176,9 +203,11 @@ val compare: t -> t -> int
(* The following is for system use only. Do not call directly. *)
external unsafe_get : string -> int -> char = "%string_unsafe_get"
-external unsafe_set : string -> int -> char -> unit = "%string_unsafe_set"
+external unsafe_set : bytes -> int -> char -> unit = "%string_unsafe_set"
+ [@@ocaml.deprecated]
external unsafe_blit :
- src:string -> src_pos:int -> dst:string -> dst_pos:int -> len:int ->
+ src:string -> src_pos:int -> dst:bytes -> dst_pos:int -> len:int ->
unit = "caml_blit_string" "noalloc"
external unsafe_fill :
- string -> pos:int -> len:int -> char -> unit = "caml_fill_string" "noalloc"
+ bytes -> pos:int -> len:int -> char -> unit = "caml_fill_string" "noalloc"
+ [@@ocaml.deprecated]
diff --git a/stdlib/sys.mli b/stdlib/sys.mli
index 944b1090f..ae175c2e8 100644
--- a/stdlib/sys.mli
+++ b/stdlib/sys.mli
@@ -11,7 +11,12 @@
(* *)
(***********************************************************************)
-(** System interface. *)
+(** System interface.
+
+ Every function in this module raises [Sys_error] with an
+ informative message when the underlying system call signal
+ an error.
+*)
val argv : string array
(** The command line arguments given to the process.
@@ -99,7 +104,7 @@ val big_endian : bool
@since 4.00.0 *)
val max_string_length : int
-(** Maximum length of a string. *)
+(** Maximum length of strings and byte sequences. *)
val max_array_length : int
(** Maximum length of a normal array. The maximum length of a float
diff --git a/stdlib/weak.ml b/stdlib/weak.ml
index 536a42e04..8166142b6 100644
--- a/stdlib/weak.ml
+++ b/stdlib/weak.ml
@@ -80,8 +80,8 @@ module Make (H : Hashtbl.HashedType) : (S with type data = H.t) = struct
let sz = if sz < 7 then 7 else sz in
let sz = if sz > Sys.max_array_length then Sys.max_array_length else sz in
{
- table = Array.create sz emptybucket;
- hashes = Array.create sz [| |];
+ table = Array.make sz emptybucket;
+ hashes = Array.make sz [| |];
limit = limit;
oversize = 0;
rover = 0;