diff options
Diffstat (limited to 'stdlib')
63 files changed, 6363 insertions, 2470 deletions
diff --git a/stdlib/.depend b/stdlib/.depend index e5b65ad51..96f95082d 100644 --- a/stdlib/.depend +++ b/stdlib/.depend @@ -2,7 +2,11 @@ arg.cmi : array.cmi : arrayLabels.cmi : buffer.cmi : +bytes.cmi : +bytesLabels.cmi : callback.cmi : +camlinternalFormat.cmi : camlinternalFormatBasics.cmi buffer.cmi +camlinternalFormatBasics.cmi : camlinternalLazy.cmi : camlinternalMod.cmi : obj.cmi camlinternalOO.cmi : obj.cmi @@ -27,16 +31,17 @@ nativeint.cmi : obj.cmi : int32.cmi oo.cmi : camlinternalOO.cmi parsing.cmi : obj.cmi lexing.cmi -pervasives.cmi : +pervasives.cmi : camlinternalFormatBasics.cmi printexc.cmi : -printf.cmi : obj.cmi buffer.cmi +printf.cmi : buffer.cmi queue.cmi : random.cmi : nativeint.cmi int64.cmi int32.cmi scanf.cmi : pervasives.cmi set.cmi : sort.cmi : stack.cmi : -stdLabels.cmi : stringLabels.cmi listLabels.cmi arrayLabels.cmi +stdLabels.cmi : stringLabels.cmi listLabels.cmi bytesLabels.cmi \ + arrayLabels.cmi stream.cmi : string.cmi : stringLabels.cmi : @@ -50,10 +55,20 @@ array.cmo : array.cmi array.cmx : array.cmi arrayLabels.cmo : array.cmi arrayLabels.cmi arrayLabels.cmx : array.cmx arrayLabels.cmi -buffer.cmo : sys.cmi string.cmi buffer.cmi -buffer.cmx : sys.cmx string.cmx buffer.cmi +buffer.cmo : sys.cmi string.cmi bytes.cmi buffer.cmi +buffer.cmx : sys.cmx string.cmx bytes.cmx buffer.cmi +bytes.cmo : pervasives.cmi list.cmi char.cmi bytes.cmi +bytes.cmx : pervasives.cmx list.cmx char.cmx bytes.cmi +bytesLabels.cmo : bytes.cmi bytesLabels.cmi +bytesLabels.cmx : bytes.cmx bytesLabels.cmi callback.cmo : obj.cmi callback.cmi callback.cmx : obj.cmx callback.cmi +camlinternalFormat.cmo : sys.cmi string.cmi char.cmi \ + camlinternalFormatBasics.cmi bytes.cmi buffer.cmi camlinternalFormat.cmi +camlinternalFormat.cmx : sys.cmx string.cmx char.cmx \ + camlinternalFormatBasics.cmx bytes.cmx buffer.cmx camlinternalFormat.cmi +camlinternalFormatBasics.cmo : camlinternalFormatBasics.cmi +camlinternalFormatBasics.cmx : camlinternalFormatBasics.cmi camlinternalLazy.cmo : obj.cmi camlinternalLazy.cmi camlinternalLazy.cmx : obj.cmx camlinternalLazy.cmi camlinternalMod.cmo : obj.cmi camlinternalOO.cmi array.cmi \ @@ -68,20 +83,22 @@ char.cmo : char.cmi char.cmx : char.cmi complex.cmo : complex.cmi complex.cmx : complex.cmi -digest.cmo : string.cmi char.cmi digest.cmi -digest.cmx : string.cmx char.cmx digest.cmi +digest.cmo : string.cmi char.cmi bytes.cmi digest.cmi +digest.cmx : string.cmx char.cmx bytes.cmx digest.cmi filename.cmo : sys.cmi string.cmi random.cmi printf.cmi lazy.cmi buffer.cmi \ filename.cmi filename.cmx : sys.cmx string.cmx random.cmx printf.cmx lazy.cmx buffer.cmx \ filename.cmi -format.cmo : string.cmi printf.cmi pervasives.cmi obj.cmi list.cmi \ - buffer.cmi format.cmi -format.cmx : string.cmx printf.cmx pervasives.cmx obj.cmx list.cmx \ - buffer.cmx format.cmi +format.cmo : string.cmi pervasives.cmi camlinternalFormatBasics.cmi \ + camlinternalFormat.cmi buffer.cmi format.cmi +format.cmx : string.cmx pervasives.cmx camlinternalFormatBasics.cmx \ + camlinternalFormat.cmx buffer.cmx format.cmi gc.cmo : sys.cmi printf.cmi gc.cmi gc.cmx : sys.cmx printf.cmx gc.cmi -genlex.cmo : string.cmi stream.cmi list.cmi hashtbl.cmi char.cmi genlex.cmi -genlex.cmx : string.cmx stream.cmx list.cmx hashtbl.cmx char.cmx genlex.cmi +genlex.cmo : string.cmi stream.cmi list.cmi hashtbl.cmi char.cmi bytes.cmi \ + genlex.cmi +genlex.cmx : string.cmx stream.cmx list.cmx hashtbl.cmx char.cmx bytes.cmx \ + genlex.cmi hashtbl.cmo : sys.cmi string.cmi random.cmi obj.cmi lazy.cmi array.cmi \ hashtbl.cmi hashtbl.cmx : sys.cmx string.cmx random.cmx obj.cmx lazy.cmx array.cmx \ @@ -92,16 +109,16 @@ int64.cmo : pervasives.cmi int64.cmi int64.cmx : pervasives.cmx int64.cmi lazy.cmo : obj.cmi camlinternalLazy.cmi lazy.cmi lazy.cmx : obj.cmx camlinternalLazy.cmx lazy.cmi -lexing.cmo : sys.cmi string.cmi array.cmi lexing.cmi -lexing.cmx : sys.cmx string.cmx array.cmx lexing.cmi +lexing.cmo : sys.cmi string.cmi bytes.cmi array.cmi lexing.cmi +lexing.cmx : sys.cmx string.cmx bytes.cmx array.cmx lexing.cmi list.cmo : list.cmi list.cmx : list.cmi listLabels.cmo : list.cmi listLabels.cmi listLabels.cmx : list.cmx listLabels.cmi map.cmo : map.cmi map.cmx : map.cmi -marshal.cmo : string.cmi marshal.cmi -marshal.cmx : string.cmx marshal.cmi +marshal.cmo : bytes.cmi marshal.cmi +marshal.cmx : bytes.cmx marshal.cmi moreLabels.cmo : set.cmi map.cmi hashtbl.cmi moreLabels.cmi moreLabels.cmx : set.cmx map.cmx hashtbl.cmx moreLabels.cmi nativeint.cmo : sys.cmi pervasives.cmi nativeint.cmi @@ -112,13 +129,15 @@ oo.cmo : camlinternalOO.cmi oo.cmi oo.cmx : camlinternalOO.cmx oo.cmi parsing.cmo : obj.cmi lexing.cmi array.cmi parsing.cmi parsing.cmx : obj.cmx lexing.cmx array.cmx parsing.cmi -pervasives.cmo : pervasives.cmi -pervasives.cmx : pervasives.cmi -printexc.cmo : printf.cmi obj.cmi buffer.cmi array.cmi printexc.cmi -printexc.cmx : printf.cmx obj.cmx buffer.cmx array.cmx printexc.cmi -printf.cmo : string.cmi obj.cmi list.cmi char.cmi buffer.cmi array.cmi \ +pervasives.cmo : camlinternalFormatBasics.cmi pervasives.cmi +pervasives.cmx : camlinternalFormatBasics.cmx pervasives.cmi +printexc.cmo : printf.cmi pervasives.cmi obj.cmi buffer.cmi array.cmi \ + printexc.cmi +printexc.cmx : printf.cmx pervasives.cmx obj.cmx buffer.cmx array.cmx \ + printexc.cmi +printf.cmo : camlinternalFormatBasics.cmi camlinternalFormat.cmi buffer.cmi \ printf.cmi -printf.cmx : string.cmx obj.cmx list.cmx char.cmx buffer.cmx array.cmx \ +printf.cmx : camlinternalFormatBasics.cmx camlinternalFormat.cmx buffer.cmx \ printf.cmi queue.cmo : obj.cmi queue.cmi queue.cmx : obj.cmx queue.cmi @@ -126,26 +145,28 @@ random.cmo : string.cmi pervasives.cmi nativeint.cmi int64.cmi int32.cmi \ digest.cmi char.cmi array.cmi random.cmi random.cmx : string.cmx pervasives.cmx nativeint.cmx int64.cmx int32.cmx \ digest.cmx char.cmx array.cmx random.cmi -scanf.cmo : string.cmi printf.cmi pervasives.cmi obj.cmi list.cmi \ - hashtbl.cmi buffer.cmi array.cmi scanf.cmi -scanf.cmx : string.cmx printf.cmx pervasives.cmx obj.cmx list.cmx \ - hashtbl.cmx buffer.cmx array.cmx scanf.cmi +scanf.cmo : string.cmi printf.cmi pervasives.cmi list.cmi \ + camlinternalFormatBasics.cmi camlinternalFormat.cmi bytes.cmi buffer.cmi \ + scanf.cmi +scanf.cmx : string.cmx printf.cmx pervasives.cmx list.cmx \ + camlinternalFormatBasics.cmx camlinternalFormat.cmx bytes.cmx buffer.cmx \ + scanf.cmi set.cmo : list.cmi set.cmi set.cmx : list.cmx set.cmi sort.cmo : array.cmi sort.cmi sort.cmx : array.cmx sort.cmi stack.cmo : list.cmi stack.cmi stack.cmx : list.cmx stack.cmi -stdLabels.cmo : stringLabels.cmi listLabels.cmi arrayLabels.cmi \ - stdLabels.cmi -stdLabels.cmx : stringLabels.cmx listLabels.cmx arrayLabels.cmx \ - stdLabels.cmi +stdLabels.cmo : stringLabels.cmi listLabels.cmi bytesLabels.cmi \ + arrayLabels.cmi stdLabels.cmi +stdLabels.cmx : stringLabels.cmx listLabels.cmx bytesLabels.cmx \ + arrayLabels.cmx stdLabels.cmi std_exit.cmo : std_exit.cmx : -stream.cmo : string.cmi obj.cmi list.cmi lazy.cmi stream.cmi -stream.cmx : string.cmx obj.cmx list.cmx lazy.cmx stream.cmi -string.cmo : pervasives.cmi list.cmi char.cmi string.cmi -string.cmx : pervasives.cmx list.cmx char.cmx string.cmi +stream.cmo : string.cmi obj.cmi list.cmi lazy.cmi bytes.cmi stream.cmi +stream.cmx : string.cmx obj.cmx list.cmx lazy.cmx bytes.cmx stream.cmi +string.cmo : pervasives.cmi list.cmi bytes.cmi string.cmi +string.cmx : pervasives.cmx list.cmx bytes.cmx string.cmi stringLabels.cmo : string.cmi stringLabels.cmi stringLabels.cmx : string.cmx stringLabels.cmi sys.cmo : sys.cmi @@ -160,10 +181,20 @@ array.cmo : array.cmi array.p.cmx : array.cmi arrayLabels.cmo : array.cmi arrayLabels.cmi arrayLabels.p.cmx : array.p.cmx arrayLabels.cmi -buffer.cmo : sys.cmi string.cmi buffer.cmi -buffer.p.cmx : sys.p.cmx string.p.cmx buffer.cmi +buffer.cmo : sys.cmi string.cmi bytes.cmi buffer.cmi +buffer.p.cmx : sys.p.cmx string.p.cmx bytes.p.cmx buffer.cmi +bytes.cmo : pervasives.cmi list.cmi char.cmi bytes.cmi +bytes.p.cmx : pervasives.p.cmx list.p.cmx char.p.cmx bytes.cmi +bytesLabels.cmo : bytes.cmi bytesLabels.cmi +bytesLabels.p.cmx : bytes.p.cmx bytesLabels.cmi callback.cmo : obj.cmi callback.cmi callback.p.cmx : obj.p.cmx callback.cmi +camlinternalFormat.cmo : sys.cmi string.cmi char.cmi \ + camlinternalFormatBasics.cmi bytes.cmi buffer.cmi camlinternalFormat.cmi +camlinternalFormat.p.cmx : sys.p.cmx string.p.cmx char.p.cmx \ + camlinternalFormatBasics.p.cmx bytes.p.cmx buffer.p.cmx camlinternalFormat.cmi +camlinternalFormatBasics.cmo : camlinternalFormatBasics.cmi +camlinternalFormatBasics.p.cmx : camlinternalFormatBasics.cmi camlinternalLazy.cmo : obj.cmi camlinternalLazy.cmi camlinternalLazy.p.cmx : obj.p.cmx camlinternalLazy.cmi camlinternalMod.cmo : obj.cmi camlinternalOO.cmi array.cmi \ @@ -178,20 +209,22 @@ char.cmo : char.cmi char.p.cmx : char.cmi complex.cmo : complex.cmi complex.p.cmx : complex.cmi -digest.cmo : string.cmi char.cmi digest.cmi -digest.p.cmx : string.p.cmx char.p.cmx digest.cmi +digest.cmo : string.cmi char.cmi bytes.cmi digest.cmi +digest.p.cmx : string.p.cmx char.p.cmx bytes.p.cmx digest.cmi filename.cmo : sys.cmi string.cmi random.cmi printf.cmi lazy.cmi buffer.cmi \ filename.cmi filename.p.cmx : sys.p.cmx string.p.cmx random.p.cmx printf.p.cmx lazy.p.cmx buffer.p.cmx \ filename.cmi -format.cmo : string.cmi printf.cmi pervasives.cmi obj.cmi list.cmi \ - buffer.cmi format.cmi -format.p.cmx : string.p.cmx printf.p.cmx pervasives.p.cmx obj.p.cmx list.p.cmx \ - buffer.p.cmx format.cmi +format.cmo : string.cmi pervasives.cmi camlinternalFormatBasics.cmi \ + camlinternalFormat.cmi buffer.cmi format.cmi +format.p.cmx : string.p.cmx pervasives.p.cmx camlinternalFormatBasics.p.cmx \ + camlinternalFormat.p.cmx buffer.p.cmx format.cmi gc.cmo : sys.cmi printf.cmi gc.cmi gc.p.cmx : sys.p.cmx printf.p.cmx gc.cmi -genlex.cmo : string.cmi stream.cmi list.cmi hashtbl.cmi char.cmi genlex.cmi -genlex.p.cmx : string.p.cmx stream.p.cmx list.p.cmx hashtbl.p.cmx char.p.cmx genlex.cmi +genlex.cmo : string.cmi stream.cmi list.cmi hashtbl.cmi char.cmi bytes.cmi \ + genlex.cmi +genlex.p.cmx : string.p.cmx stream.p.cmx list.p.cmx hashtbl.p.cmx char.p.cmx bytes.p.cmx \ + genlex.cmi hashtbl.cmo : sys.cmi string.cmi random.cmi obj.cmi lazy.cmi array.cmi \ hashtbl.cmi hashtbl.p.cmx : sys.p.cmx string.p.cmx random.p.cmx obj.p.cmx lazy.p.cmx array.p.cmx \ @@ -202,16 +235,16 @@ int64.cmo : pervasives.cmi int64.cmi int64.p.cmx : pervasives.p.cmx int64.cmi lazy.cmo : obj.cmi camlinternalLazy.cmi lazy.cmi lazy.p.cmx : obj.p.cmx camlinternalLazy.p.cmx lazy.cmi -lexing.cmo : sys.cmi string.cmi array.cmi lexing.cmi -lexing.p.cmx : sys.p.cmx string.p.cmx array.p.cmx lexing.cmi +lexing.cmo : sys.cmi string.cmi bytes.cmi array.cmi lexing.cmi +lexing.p.cmx : sys.p.cmx string.p.cmx bytes.p.cmx array.p.cmx lexing.cmi list.cmo : list.cmi list.p.cmx : list.cmi listLabels.cmo : list.cmi listLabels.cmi listLabels.p.cmx : list.p.cmx listLabels.cmi map.cmo : map.cmi map.p.cmx : map.cmi -marshal.cmo : string.cmi marshal.cmi -marshal.p.cmx : string.p.cmx marshal.cmi +marshal.cmo : bytes.cmi marshal.cmi +marshal.p.cmx : bytes.p.cmx marshal.cmi moreLabels.cmo : set.cmi map.cmi hashtbl.cmi moreLabels.cmi moreLabels.p.cmx : set.p.cmx map.p.cmx hashtbl.p.cmx moreLabels.cmi nativeint.cmo : sys.cmi pervasives.cmi nativeint.cmi @@ -222,13 +255,15 @@ oo.cmo : camlinternalOO.cmi oo.cmi oo.p.cmx : camlinternalOO.p.cmx oo.cmi parsing.cmo : obj.cmi lexing.cmi array.cmi parsing.cmi parsing.p.cmx : obj.p.cmx lexing.p.cmx array.p.cmx parsing.cmi -pervasives.cmo : pervasives.cmi -pervasives.p.cmx : pervasives.cmi -printexc.cmo : printf.cmi obj.cmi buffer.cmi array.cmi printexc.cmi -printexc.p.cmx : printf.p.cmx obj.p.cmx buffer.p.cmx array.p.cmx printexc.cmi -printf.cmo : string.cmi obj.cmi list.cmi char.cmi buffer.cmi array.cmi \ +pervasives.cmo : camlinternalFormatBasics.cmi pervasives.cmi +pervasives.p.cmx : camlinternalFormatBasics.p.cmx pervasives.cmi +printexc.cmo : printf.cmi pervasives.cmi obj.cmi buffer.cmi array.cmi \ + printexc.cmi +printexc.p.cmx : printf.p.cmx pervasives.p.cmx obj.p.cmx buffer.p.cmx array.p.cmx \ + printexc.cmi +printf.cmo : camlinternalFormatBasics.cmi camlinternalFormat.cmi buffer.cmi \ printf.cmi -printf.p.cmx : string.p.cmx obj.p.cmx list.p.cmx char.p.cmx buffer.p.cmx array.p.cmx \ +printf.p.cmx : camlinternalFormatBasics.p.cmx camlinternalFormat.p.cmx buffer.p.cmx \ printf.cmi queue.cmo : obj.cmi queue.cmi queue.p.cmx : obj.p.cmx queue.cmi @@ -236,26 +271,28 @@ random.cmo : string.cmi pervasives.cmi nativeint.cmi int64.cmi int32.cmi \ digest.cmi char.cmi array.cmi random.cmi random.p.cmx : string.p.cmx pervasives.p.cmx nativeint.p.cmx int64.p.cmx int32.p.cmx \ digest.p.cmx char.p.cmx array.p.cmx random.cmi -scanf.cmo : string.cmi printf.cmi pervasives.cmi obj.cmi list.cmi \ - hashtbl.cmi buffer.cmi array.cmi scanf.cmi -scanf.p.cmx : string.p.cmx printf.p.cmx pervasives.p.cmx obj.p.cmx list.p.cmx \ - hashtbl.p.cmx buffer.p.cmx array.p.cmx scanf.cmi +scanf.cmo : string.cmi printf.cmi pervasives.cmi list.cmi \ + camlinternalFormatBasics.cmi camlinternalFormat.cmi bytes.cmi buffer.cmi \ + scanf.cmi +scanf.p.cmx : string.p.cmx printf.p.cmx pervasives.p.cmx list.p.cmx \ + camlinternalFormatBasics.p.cmx camlinternalFormat.p.cmx bytes.p.cmx buffer.p.cmx \ + scanf.cmi set.cmo : list.cmi set.cmi set.p.cmx : list.p.cmx set.cmi sort.cmo : array.cmi sort.cmi sort.p.cmx : array.p.cmx sort.cmi stack.cmo : list.cmi stack.cmi stack.p.cmx : list.p.cmx stack.cmi -stdLabels.cmo : stringLabels.cmi listLabels.cmi arrayLabels.cmi \ - stdLabels.cmi -stdLabels.p.cmx : stringLabels.p.cmx listLabels.p.cmx arrayLabels.p.cmx \ - stdLabels.cmi +stdLabels.cmo : stringLabels.cmi listLabels.cmi bytesLabels.cmi \ + arrayLabels.cmi stdLabels.cmi +stdLabels.p.cmx : stringLabels.p.cmx listLabels.p.cmx bytesLabels.p.cmx \ + arrayLabels.p.cmx stdLabels.cmi std_exit.cmo : std_exit.p.cmx : -stream.cmo : string.cmi obj.cmi list.cmi lazy.cmi stream.cmi -stream.p.cmx : string.p.cmx obj.p.cmx list.p.cmx lazy.p.cmx stream.cmi -string.cmo : pervasives.cmi list.cmi char.cmi string.cmi -string.p.cmx : pervasives.p.cmx list.p.cmx char.p.cmx string.cmi +stream.cmo : string.cmi obj.cmi list.cmi lazy.cmi bytes.cmi stream.cmi +stream.p.cmx : string.p.cmx obj.p.cmx list.p.cmx lazy.p.cmx bytes.p.cmx stream.cmi +string.cmo : pervasives.cmi list.cmi bytes.cmi string.cmi +string.p.cmx : pervasives.p.cmx list.p.cmx bytes.p.cmx string.cmi stringLabels.cmo : string.cmi stringLabels.cmi stringLabels.p.cmx : string.p.cmx stringLabels.cmi sys.cmo : sys.cmi diff --git a/stdlib/Compflags b/stdlib/Compflags index d0938af89..f393c4ec0 100755 --- a/stdlib/Compflags +++ b/stdlib/Compflags @@ -18,8 +18,11 @@ case $1 in camlinternalOO.cmx|camlinternalOO.p.cmx) echo ' -inline 0';; buffer.cmx|buffer.p.cmx) echo ' -inline 3';; # make sure add_char is inlined (PR#5872) - buffer.cm[io]|printf.cm[io]|format.cm[io]|scanf.cm[io]) echo ' -w A';; + buffer.cm[io]) echo ' -w A';; + camlinternalFormat.cm[io]) echo ' -w Ae';; + camlinternalFormatBasics*.cm[iox]) echo ' -nopervasives';; + printf.cm[io]|format.cm[io]|scanf.cm[io]) echo ' -w Ae';; scanf.cmx|scanf.p.cmx) echo ' -inline 9';; - *Labels.cm[ox]|*Labels.p.cmx) echo ' -nolabels -trans-mod';; + *Labels.cm[ox]|*Labels.p.cmx) echo ' -nolabels -no-alias-deps';; *) echo ' ';; esac diff --git a/stdlib/Makefile b/stdlib/Makefile index 80be16e0b..37f9a5f0b 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -25,19 +25,22 @@ allopt-prof: stdlib.p.cmxa std_exit.p.cmx installopt: installopt-default installopt-$(PROFILING) installopt-default: - cp stdlib.cmxa stdlib.a std_exit.o *.cmx $(LIBDIR) - cd $(LIBDIR); $(RANLIB) stdlib.a + cp stdlib.cmxa stdlib.a std_exit.o *.cmx $(INSTALL_LIBDIR) + cd $(INSTALL_LIBDIR); $(RANLIB) stdlib.a installopt-noprof: - rm -f $(LIBDIR)/stdlib.p.cmxa; ln -s stdlib.cmxa $(LIBDIR)/stdlib.p.cmxa - rm -f $(LIBDIR)/stdlib.p.a; ln -s stdlib.a $(LIBDIR)/stdlib.p.a - rm -f $(LIBDIR)/std_exit.p.cmx; \ - ln -s std_exit.cmx $(LIBDIR)/std_exit.p.cmx - rm -f $(LIBDIR)/std_exit.p.o; ln -s std_exit.o $(LIBDIR)/std_exit.p.o + rm -f $(INSTALL_LIBDIR)/stdlib.p.cmxa; \ + ln -s stdlib.cmxa $(INSTALL_LIBDIR)/stdlib.p.cmxa + rm -f $(INSTALL_LIBDIR)/stdlib.p.a; \ + ln -s stdlib.a $(INSTALL_LIBDIR)/stdlib.p.a + rm -f $(INSTALL_LIBDIR)/std_exit.p.cmx; \ + ln -s std_exit.cmx $(INSTALL_LIBDIR)/std_exit.p.cmx + rm -f $(INSTALL_LIBDIR)/std_exit.p.o; \ + ln -s std_exit.o $(INSTALL_LIBDIR)/std_exit.p.o installopt-prof: - cp stdlib.p.cmxa stdlib.p.a std_exit.p.cmx std_exit.p.o $(LIBDIR) - cd $(LIBDIR); $(RANLIB) stdlib.p.a + cp stdlib.p.cmxa stdlib.p.a std_exit.p.cmx std_exit.p.o $(INSTALL_LIBDIR) + cd $(INSTALL_LIBDIR); $(RANLIB) stdlib.p.a stdlib.p.cmxa: $(OBJS:.cmo=.p.cmx) $(CAMLOPT) -a -o stdlib.p.cmxa $(OBJS:.cmo=.p.cmx) diff --git a/stdlib/Makefile.nt b/stdlib/Makefile.nt index b85622b90..590701bf9 100644 --- a/stdlib/Makefile.nt +++ b/stdlib/Makefile.nt @@ -16,7 +16,7 @@ include Makefile.shared allopt: stdlib.cmxa std_exit.cmx installopt: - cp stdlib.cmxa stdlib.$(A) std_exit.$(O) *.cmx $(LIBDIR) + cp stdlib.cmxa stdlib.$(A) std_exit.$(O) *.cmx $(INSTALL_LIBDIR) camlheader camlheader_ur: headernt.c ../config/Makefile $(BYTECC) $(BYTECCCOMPOPTS) -c -I../byterun \ diff --git a/stdlib/Makefile.shared b/stdlib/Makefile.shared index 80c40d600..54de337cb 100755 --- a/stdlib/Makefile.shared +++ b/stdlib/Makefile.shared @@ -15,38 +15,41 @@ include ../config/Makefile RUNTIME=../boot/ocamlrun COMPILER=../ocamlc CAMLC=$(RUNTIME) $(COMPILER) -COMPFLAGS=-strict-sequence -w +33..39 -g -warn-error A -bin-annot -nostdlib +COMPFLAGS=-strict-sequence -w +33..39 -g -warn-error A -bin-annot -nostdlib \ + -safe-string OPTCOMPILER=../ocamlopt CAMLOPT=$(RUNTIME) $(OPTCOMPILER) -OPTCOMPFLAGS=-warn-error A -nostdlib -g -bin-annot CAMLDEP=../boot/ocamlrun ../tools/ocamldep -OBJS=pervasives.cmo $(OTHERS) -OTHERS=array.cmo list.cmo char.cmo string.cmo sys.cmo \ +OBJS=camlinternalFormatBasics.cmo pervasives.cmo $(OTHERS) +OTHERS=array.cmo list.cmo char.cmo bytes.cmo string.cmo sys.cmo \ sort.cmo marshal.cmo obj.cmo \ int32.cmo int64.cmo nativeint.cmo \ lexing.cmo parsing.cmo \ set.cmo map.cmo stack.cmo queue.cmo \ camlinternalLazy.cmo lazy.cmo stream.cmo \ - buffer.cmo printf.cmo \ + buffer.cmo camlinternalFormat.cmo printf.cmo \ arg.cmo printexc.cmo gc.cmo \ digest.cmo random.cmo hashtbl.cmo format.cmo scanf.cmo callback.cmo \ camlinternalOO.cmo oo.cmo camlinternalMod.cmo \ genlex.cmo weak.cmo \ filename.cmo complex.cmo \ - arrayLabels.cmo listLabels.cmo stringLabels.cmo moreLabels.cmo stdLabels.cmo + arrayLabels.cmo listLabels.cmo bytesLabels.cmo \ + stringLabels.cmo moreLabels.cmo stdLabels.cmo all: stdlib.cma std_exit.cmo camlheader camlheader_ur +INSTALL_LIBDIR=$(DESTDIR)$(LIBDIR) + install: install-$(RUNTIMED) cp stdlib.cma std_exit.cmo *.cmi *.mli *.ml camlheader camlheader_ur \ - $(LIBDIR) + $(INSTALL_LIBDIR) install-noruntimed: .PHONY: install-noruntimed install-runtimed: camlheaderd - cp camlheaderd $(LIBDIR) + cp camlheaderd $(INSTALL_LIBDIR) .PHONY: install-runtimed stdlib.cma: $(OBJS) @@ -73,10 +76,10 @@ clean:: $(CAMLC) $(COMPFLAGS) `./Compflags $@` -c $< .ml.cmx: - $(CAMLOPT) $(OPTCOMPFLAGS) `./Compflags $@` -c $< + $(CAMLOPT) $(COMPFLAGS) `./Compflags $@` -c $< .ml.p.cmx: - $(CAMLOPT) $(OPTCOMPFLAGS) `./Compflags $@` -p -c -o $*.p.cmx $< + $(CAMLOPT) $(COMPFLAGS) `./Compflags $@` -p -c -o $*.p.cmx $< # Dependencies on the compiler $(OBJS) std_exit.cmo: $(COMPILER) @@ -85,7 +88,7 @@ $(OBJS:.cmo=.cmx) std_exit.cmx: $(OPTCOMPILER) $(OBJS:.cmo=.p.cmx) std_exit.p.cmx: $(OPTCOMPILER) # Dependencies on Pervasives (not tracked by ocamldep) -$(OBJS) std_exit.cmo: pervasives.cmi +$(OTHERS) std_exit.cmo: pervasives.cmi $(OTHERS:.cmo=.cmi) std_exit.cmi: pervasives.cmi $(OBJS:.cmo=.cmx) std_exit.cmx: pervasives.cmi $(OBJS:.cmo=.p.cmx) std_exit.p.cmx: pervasives.cmi diff --git a/stdlib/StdlibModules b/stdlib/StdlibModules index c5c8896ed..abdfcb362 100644 --- a/stdlib/StdlibModules +++ b/stdlib/StdlibModules @@ -21,7 +21,11 @@ STDLIB_MODULES=\ array \ arrayLabels \ buffer \ + bytes \ + bytesLabels \ callback \ + camlinternalFormat \ + camlinternalFormatBasics \ camlinternalLazy \ camlinternalMod \ camlinternalOO \ diff --git a/stdlib/arg.ml b/stdlib/arg.ml index 8b64236a7..0f6480b82 100644 --- a/stdlib/arg.ml +++ b/stdlib/arg.ml @@ -112,11 +112,11 @@ let parse_argv_dynamic ?(current=current) argv speclist anonfun errmsg = | Unknown "-help" -> () | Unknown "--help" -> () | Unknown s -> - bprintf b "%s: unknown option `%s'.\n" progname s + bprintf b "%s: unknown option '%s'.\n" progname s | Missing s -> - bprintf b "%s: option `%s' needs an argument.\n" progname s + bprintf b "%s: option '%s' needs an argument.\n" progname s | Wrong (opt, arg, expected) -> - bprintf b "%s: wrong argument `%s'; option `%s' expects %s.\n" + bprintf b "%s: wrong argument '%s'; option '%s' expects %s.\n" progname arg opt expected | Message s -> bprintf b "%s: %s.\n" progname s @@ -129,7 +129,7 @@ let parse_argv_dynamic ?(current=current) argv speclist anonfun errmsg = incr current; while !current < l do let s = argv.(!current) in - if String.length s >= 1 && String.get s 0 = '-' then begin + if String.length s >= 1 && s.[0] = '-' then begin let action = try assoc3 s !speclist with Not_found -> stop (Unknown s) diff --git a/stdlib/arg.mli b/stdlib/arg.mli index e6e07316d..22eda40b7 100644 --- a/stdlib/arg.mli +++ b/stdlib/arg.mli @@ -94,7 +94,7 @@ val parse : *) val parse_dynamic : - (key * spec * doc) list ref -> anon_fun -> string -> unit + (key * spec * doc) list ref -> anon_fun -> usage_msg -> unit (** Same as {!Arg.parse}, except that the [speclist] argument is a reference and may be updated during the parsing. A typical use for this feature is to parse command lines of the form: diff --git a/stdlib/array.mli b/stdlib/array.mli index 7c0049e28..e9a64528f 100644 --- a/stdlib/array.mli +++ b/stdlib/array.mli @@ -47,6 +47,7 @@ external make : int -> 'a -> 'a array = "caml_make_vect" size is only [Sys.max_array_length / 2].*) external create : int -> 'a -> 'a array = "caml_make_vect" + [@@ocaml.deprecated] (** @deprecated [Array.create] is an alias for {!Array.make}. *) val init : int -> (int -> 'a) -> 'a array @@ -73,6 +74,7 @@ val make_matrix : int -> int -> 'a -> 'a array array size is only [Sys.max_array_length / 2]. *) val create_matrix : int -> int -> 'a -> 'a array array + [@@ocaml.deprecated] (** @deprecated [Array.create_matrix] is an alias for {!Array.make_matrix}. *) val append : 'a array -> 'a array -> 'a array diff --git a/stdlib/arrayLabels.mli b/stdlib/arrayLabels.mli index 03b6224ae..cf8b650e5 100644 --- a/stdlib/arrayLabels.mli +++ b/stdlib/arrayLabels.mli @@ -47,7 +47,8 @@ external make : int -> 'a -> 'a array = "caml_make_vect" size is only [Sys.max_array_length / 2].*) external create : int -> 'a -> 'a array = "caml_make_vect" -(** @deprecated [Array.create] is an alias for {!ArrayLabels.make}. *) + [@@ocaml.deprecated] +(** @deprecated [ArrayLabels.create] is an alias for {!ArrayLabels.make}. *) val init : int -> f:(int -> 'a) -> 'a array (** [Array.init n f] returns a fresh array of length [n], @@ -73,7 +74,8 @@ val make_matrix : dimx:int -> dimy:int -> 'a -> 'a array array size is only [Sys.max_array_length / 2]. *) val create_matrix : dimx:int -> dimy:int -> 'a -> 'a array array -(** @deprecated [Array.create_matrix] is an alias for + [@@ocaml.deprecated] +(** @deprecated [ArrayLabels.create_matrix] is an alias for {!ArrayLabels.make_matrix}. *) val append : 'a array -> 'a array -> 'a array diff --git a/stdlib/buffer.ml b/stdlib/buffer.ml index 78a9e2611..986fe6f33 100644 --- a/stdlib/buffer.ml +++ b/stdlib/buffer.ml @@ -14,41 +14,38 @@ (* Extensible buffers *) type t = - {mutable buffer : string; + {mutable buffer : bytes; mutable position : int; mutable length : int; - initial_buffer : string} + initial_buffer : bytes} let create n = let n = if n < 1 then 1 else n in let n = if n > Sys.max_string_length then Sys.max_string_length else n in - let s = String.create n in + let s = Bytes.create n in {buffer = s; position = 0; length = n; initial_buffer = s} -let contents b = String.sub b.buffer 0 b.position +let contents b = Bytes.sub_string b.buffer 0 b.position +let to_bytes b = Bytes.sub b.buffer 0 b.position let sub b ofs len = if ofs < 0 || len < 0 || ofs > b.position - len then invalid_arg "Buffer.sub" - else begin - let r = String.create len in - String.unsafe_blit b.buffer ofs r 0 len; - r - end + else Bytes.sub_string b.buffer ofs len ;; let blit src srcoff dst dstoff len = if len < 0 || srcoff < 0 || srcoff > src.position - len - || dstoff < 0 || dstoff > (String.length dst) - len + || dstoff < 0 || dstoff > (Bytes.length dst) - len then invalid_arg "Buffer.blit" else - String.blit src.buffer srcoff dst dstoff len + Bytes.unsafe_blit src.buffer srcoff dst dstoff len ;; let nth b ofs = if ofs < 0 || ofs >= b.position then invalid_arg "Buffer.nth" - else String.unsafe_get b.buffer ofs + else Bytes.unsafe_get b.buffer ofs ;; let length b = b.position @@ -57,7 +54,7 @@ let clear b = b.position <- 0 let reset b = b.position <- 0; b.buffer <- b.initial_buffer; - b.length <- String.length b.buffer + b.length <- Bytes.length b.buffer let resize b more = let len = b.length in @@ -68,34 +65,41 @@ let resize b more = then new_len := Sys.max_string_length else failwith "Buffer.add: cannot grow buffer" end; - let new_buffer = String.create !new_len in - String.blit b.buffer 0 new_buffer 0 b.position; + let new_buffer = Bytes.create !new_len in + (* PR#6148: let's keep using [blit] rather than [unsafe_blit] in + this tricky function that is slow anyway. *) + Bytes.blit b.buffer 0 new_buffer 0 b.position; b.buffer <- new_buffer; b.length <- !new_len let add_char b c = let pos = b.position in if pos >= b.length then resize b 1; - String.unsafe_set b.buffer pos c; + Bytes.unsafe_set b.buffer pos c; b.position <- pos + 1 let add_substring b s offset len = - if offset < 0 || len < 0 || offset > String.length s - len - then invalid_arg "Buffer.add_substring"; + if offset < 0 || len < 0 || offset + len > String.length s + then invalid_arg "Buffer.add_substring/add_subbytes"; let new_position = b.position + len in if new_position > b.length then resize b len; - String.unsafe_blit s offset b.buffer b.position len; + Bytes.blit_string s offset b.buffer b.position len; b.position <- new_position +let add_subbytes b s offset len = + add_substring b (Bytes.unsafe_to_string s) offset len + let add_string b s = let len = String.length s in let new_position = b.position + len in if new_position > b.length then resize b len; - String.unsafe_blit s 0 b.buffer b.position len; + Bytes.blit_string s 0 b.buffer b.position len; b.position <- new_position +let add_bytes b s = add_string b (Bytes.unsafe_to_string s) + let add_buffer b bs = - add_substring b bs.buffer 0 bs.position + add_subbytes b bs.buffer 0 bs.position let add_channel b ic len = if len < 0 || len > Sys.max_string_length then (* PR#5004 *) diff --git a/stdlib/buffer.mli b/stdlib/buffer.mli index c50c98792..e7ce8b999 100644 --- a/stdlib/buffer.mli +++ b/stdlib/buffer.mli @@ -11,9 +11,9 @@ (* *) (***********************************************************************) -(** Extensible string buffers. +(** Extensible buffers. - This module implements string buffers that automatically expand + This module implements buffers that automatically expand as necessary. It provides accumulative concatenation of strings in quasi-linear time (instead of quadratic time when strings are concatenated pairwise). @@ -24,8 +24,8 @@ type t val create : int -> t (** [create n] returns a fresh buffer, initially empty. - The [n] parameter is the initial size of the internal string - that holds the buffer contents. That string is automatically + The [n] parameter is the initial size of the internal byte sequence + that holds the buffer contents. That byte sequence is automatically reallocated when more than [n] characters are stored in the buffer, but shrinks back to [n] characters when [reset] is called. For best performance, [n] should be of the same order of magnitude @@ -40,26 +40,30 @@ val contents : t -> string (** Return a copy of the current contents of the buffer. The buffer itself is unchanged. *) +val to_bytes : t -> bytes +(** Return a copy of the current contents of the buffer. + The buffer itself is unchanged. *) + val sub : t -> int -> int -> string -(** [Buffer.sub b off len] returns (a copy of) the substring of the -current contents of the buffer [b] starting at offset [off] of length -[len] bytes. May raise [Invalid_argument] if out of bounds request. The -buffer itself is unaffected. *) +(** [Buffer.sub b off len] returns (a copy of) the bytes from the + current contents of the buffer [b] starting at offset [off] of + length [len] bytes. May raise [Invalid_argument] if out of bounds + request. The buffer itself is unaffected. *) -val blit : t -> int -> string -> int -> int -> unit +val blit : t -> int -> bytes -> int -> int -> unit (** [Buffer.blit src srcoff dst dstoff len] copies [len] characters from the current contents of the buffer [src], starting at offset [srcoff] - to string [dst], starting at character [dstoff]. + to [dst], starting at character [dstoff]. Raise [Invalid_argument] if [srcoff] and [len] do not designate a valid - substring of [src], or if [dstoff] and [len] do not designate a valid - substring of [dst]. + range of [src], or if [dstoff] and [len] do not designate a valid + range of [dst]. @since 3.11.2 *) val nth : t -> int -> char (** get the n-th character of the buffer. Raise [Invalid_argument] if -index out of bounds *) + index out of bounds *) val length : t -> int (** Return the number of characters currently contained in the buffer. *) @@ -68,8 +72,8 @@ val clear : t -> unit (** Empty the buffer. *) val reset : t -> unit -(** Empty the buffer and deallocate the internal string holding the - buffer contents, replacing it with the initial internal string +(** Empty the buffer and deallocate the internal byte sequence holding the + buffer contents, replacing it with the initial internal byte sequence of length [n] that was allocated by {!Buffer.create} [n]. For long-lived buffers that may have grown a lot, [reset] allows faster reclamation of the space used by the buffer. *) @@ -80,10 +84,17 @@ val add_char : t -> char -> unit val add_string : t -> string -> unit (** [add_string b s] appends the string [s] at the end of the buffer [b]. *) +val add_bytes : t -> bytes -> unit +(** [add_string b s] appends the string [s] at the end of the buffer [b]. *) + val add_substring : t -> string -> int -> int -> unit (** [add_substring b s ofs len] takes [len] characters from offset [ofs] in string [s] and appends them at the end of the buffer [b]. *) +val add_subbytes : t -> bytes -> int -> int -> unit +(** [add_substring b s ofs len] takes [len] characters from offset + [ofs] in byte sequence [s] and appends them at the end of the buffer [b]. *) + val add_substitute : t -> (string -> string) -> string -> unit (** [add_substitute b f s] appends the string pattern [s] at the end of the buffer [b] with substitution. diff --git a/stdlib/bytes.ml b/stdlib/bytes.ml new file mode 100644 index 000000000..ece7c1ea5 --- /dev/null +++ b/stdlib/bytes.ml @@ -0,0 +1,253 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the GNU Library General Public License, with *) +(* the special exception on linking described in file ../LICENSE. *) +(* *) +(***********************************************************************) + +(* Byte sequence operations *) + +external length : bytes -> int = "%string_length" +external string_length : string -> int = "%string_length" +external get : bytes -> int -> char = "%string_safe_get" +external set : bytes -> int -> char -> unit = "%string_safe_set" +external create : int -> bytes = "caml_create_string" +external unsafe_get : bytes -> int -> char = "%string_unsafe_get" +external unsafe_set : bytes -> int -> char -> unit = "%string_unsafe_set" +external unsafe_fill : bytes -> int -> int -> char -> unit + = "caml_fill_string" "noalloc" +external unsafe_to_string : bytes -> string = "%identity" +external unsafe_of_string : string -> bytes = "%identity" + +external unsafe_blit : bytes -> int -> bytes -> int -> int -> unit + = "caml_blit_string" "noalloc" +external unsafe_blit_string : string -> int -> bytes -> int -> int -> unit + = "caml_blit_string" "noalloc" + +let make n c = + let s = create n in + unsafe_fill s 0 n c; + s + +let init n f = + let s = create n in + for i = 0 to n - 1 do + unsafe_set s i (f i) + done; + s + +let empty = create 0;; + +let copy s = + let len = length s in + let r = create len in + unsafe_blit s 0 r 0 len; + r + +let to_string b = unsafe_to_string (copy b) +let of_string s = copy (unsafe_of_string s) + +let sub s ofs len = + if ofs < 0 || len < 0 || ofs > length s - len + then invalid_arg "Bytes.sub" + else begin + let r = create len in + unsafe_blit s ofs r 0 len; + r + end + +let sub_string b ofs len = unsafe_to_string (sub b ofs len) + +let extend s left right = + let len = length s + left + right in + let r = create len in + let (srcoff, dstoff) = if left < 0 then -left, 0 else 0, left in + let cpylen = min (length s - srcoff) (len - dstoff) in + if cpylen > 0 then unsafe_blit s srcoff r dstoff cpylen; + r + +let fill s ofs len c = + if ofs < 0 || len < 0 || ofs > length s - len + then invalid_arg "Bytes.fill" + else unsafe_fill s ofs len c + +let blit s1 ofs1 s2 ofs2 len = + if len < 0 || ofs1 < 0 || ofs1 > length s1 - len + || ofs2 < 0 || ofs2 > length s2 - len + then invalid_arg "Bytes.blit" + else unsafe_blit s1 ofs1 s2 ofs2 len + +let blit_string s1 ofs1 s2 ofs2 len = + if len < 0 || ofs1 < 0 || ofs1 > string_length s1 - len + || ofs2 < 0 || ofs2 > length s2 - len + then invalid_arg "Bytes.blit_string" + else unsafe_blit_string s1 ofs1 s2 ofs2 len + +let iter f a = + for i = 0 to length a - 1 do f(unsafe_get a i) done + +let iteri f a = + for i = 0 to length a - 1 do f i (unsafe_get a i) done + +let concat sep l = + match l with + [] -> empty + | hd :: tl -> + let num = ref 0 and len = ref 0 in + List.iter (fun s -> incr num; len := !len + length s) l; + let r = create (!len + length sep * (!num - 1)) in + unsafe_blit hd 0 r 0 (length hd); + let pos = ref(length hd) in + List.iter + (fun s -> + unsafe_blit sep 0 r !pos (length sep); + pos := !pos + length sep; + unsafe_blit s 0 r !pos (length s); + pos := !pos + length s) + tl; + r + +let cat s1 s2 = + let l1 = length s1 in + let l2 = length s2 in + let r = create (l1 + l2) in + unsafe_blit s1 0 r 0 l1; + unsafe_blit s2 0 r l1 l2; + r +;; + +external is_printable: char -> bool = "caml_is_printable" +external char_code: char -> int = "%identity" +external char_chr: int -> char = "%identity" + +let is_space = function + | ' ' | '\012' | '\n' | '\r' | '\t' -> true + | _ -> false + +let trim s = + let len = length s in + let i = ref 0 in + while !i < len && is_space (unsafe_get s !i) do + incr i + done; + let j = ref (len - 1) in + while !j >= !i && is_space (unsafe_get s !j) do + decr j + done; + if !j >= !i then + sub s !i (!j - !i + 1) + else + empty + +let escaped s = + let n = ref 0 in + for i = 0 to length s - 1 do + n := !n + + (match unsafe_get s i with + | '"' | '\\' | '\n' | '\t' | '\r' | '\b' -> 2 + | c -> if is_printable c then 1 else 4) + done; + if !n = length s then copy s else begin + let s' = create !n in + n := 0; + for i = 0 to length s - 1 do + begin match unsafe_get s i with + | ('"' | '\\') as c -> + unsafe_set s' !n '\\'; incr n; unsafe_set s' !n c + | '\n' -> + unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'n' + | '\t' -> + unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 't' + | '\r' -> + unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'r' + | '\b' -> + unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'b' + | c -> + if is_printable c then + unsafe_set s' !n c + else begin + let a = char_code c in + unsafe_set s' !n '\\'; + incr n; + unsafe_set s' !n (char_chr (48 + a / 100)); + incr n; + unsafe_set s' !n (char_chr (48 + (a / 10) mod 10)); + incr n; + unsafe_set s' !n (char_chr (48 + a mod 10)) + end + end; + incr n + done; + s' + end + +let map f s = + let l = length s in + if l = 0 then s else begin + let r = create l in + for i = 0 to l - 1 do unsafe_set r i (f (unsafe_get s i)) done; + r + end + +let mapi f s = + let l = length s in + if l = 0 then s else begin + let r = create l in + for i = 0 to l - 1 do unsafe_set r i (f i (unsafe_get s i)) done; + r + end + +let uppercase s = map Char.uppercase s +let lowercase s = map Char.lowercase s + +let apply1 f s = + if length s = 0 then s else begin + let r = copy s in + unsafe_set r 0 (f(unsafe_get s 0)); + r + end + +let capitalize s = apply1 Char.uppercase s +let uncapitalize s = apply1 Char.lowercase s + +let rec index_rec s lim i c = + if i >= lim then raise Not_found else + if unsafe_get s i = c then i else index_rec s lim (i + 1) c;; + +let index s c = index_rec s (length s) 0 c;; + +let index_from s i c = + let l = length s in + if i < 0 || i > l then invalid_arg "Bytes.index_from" else + index_rec s l i c;; + +let rec rindex_rec s i c = + if i < 0 then raise Not_found else + if unsafe_get s i = c then i else rindex_rec s (i - 1) c;; + +let rindex s c = rindex_rec s (length s - 1) c;; + +let rindex_from s i c = + if i < -1 || i >= length s then invalid_arg "Bytes.rindex_from" else + rindex_rec s i c;; + +let contains_from s i c = + let l = length s in + if i < 0 || i > l then invalid_arg "Bytes.contains_from" else + try ignore (index_rec s l i c); true with Not_found -> false;; + +let contains s c = contains_from s 0 c;; + +let rcontains_from s i c = + if i < 0 || i >= length s then invalid_arg "Bytes.rcontains_from" else + try ignore (rindex_rec s i c); true with Not_found -> false;; + +type t = bytes + +let compare (x: t) (y: t) = Pervasives.compare x y diff --git a/stdlib/bytes.mli b/stdlib/bytes.mli new file mode 100644 index 000000000..82b28a28c --- /dev/null +++ b/stdlib/bytes.mli @@ -0,0 +1,398 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the GNU Library General Public License, with *) +(* the special exception on linking described in file ../LICENSE. *) +(* *) +(***********************************************************************) + +(** Byte sequence operations. + + A byte sequence is a mutable data structure that contains a + fixed-length sequence of bytes. Each byte can be indexed in + constant time for reading or writing. + + Given a byte sequence [s] of length [l], we can access each of the + [l] bytes of [s] via its index in the sequence. Indexes start at + [0], and we will call an index valid in [s] if it falls within the + range [[0...l-1]] (inclusive). A position is the point between two + bytes or at the beginning or end of the sequence. We call a + position valid in [s] if it falls within the range [[0...l]] + (inclusive). Note that the byte at index [n] is between positions + [n] and [n+1]. + + Two parameters [start] and [len] are said to designate a valid + range of [s] if [len >= 0] and [start] and [start+len] are valid + positions in [s]. + + Byte sequences can be modified in place, for instance via the [set] + and [blit] functions described below. See also strings (module + {!String}), which are almost the same data structure, but cannot be + modified in place. + + Bytes are represented by the OCaml type [char]. + + @since 4.02.0 + *) + +external length : bytes -> int = "%string_length" +(** Return the length (number of bytes) of the argument. *) + +external get : bytes -> int -> char = "%string_safe_get" +(** [get s n] returns the byte at index [n] in argument [s]. + + Raise [Invalid_argument] if [n] not a valid index in [s]. *) + +external set : bytes -> int -> char -> unit = "%string_safe_set" +(** [set s n c] modifies [s] in place, replacing the byte at index [n] + with [c]. + + Raise [Invalid_argument] if [n] is not a valid index in [s]. *) + +external create : int -> bytes = "caml_create_string" +(** [create n] returns a new byte sequence of length [n]. The + sequence is uninitialized and contains arbitrary bytes. + + Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *) + +val make : int -> char -> bytes +(** [make n c] returns a new byte sequence of length [n], filled with + the byte [c]. + + Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *) + +val init : int -> (int -> char) -> bytes +(** [Bytes.init n f] returns a fresh byte sequence of length [n], with + character [i] initialized to the result of [f i] (in increasing + index order). + + Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *) + +val empty : bytes +(** A byte sequence of size 0. *) + +val copy : bytes -> bytes +(** Return a new byte sequence that contains the same bytes as the + argument. *) + +val of_string : string -> bytes +(** Return a new byte sequence that contains the same bytes as the + given string. *) + +val to_string : bytes -> string +(** Return a new string that contains the same bytes as the given byte + sequence. *) + +val sub : bytes -> int -> int -> bytes +(** [sub s start len] returns a new byte sequence of length [len], + containing the subsequence of [s] that starts at position [start] + and has length [len]. + + Raise [Invalid_argument] if [start] and [len] do not designate a + valid range of [s]. *) + +val sub_string : bytes -> int -> int -> string +(** Same as [sub] but return a string instead of a byte sequence. *) + +val extend : bytes -> int -> int -> bytes +(** [extend s left right] returns a new byte sequence that contains + the bytes of [s], with [left] uninitialized bytes prepended and + [right] uninitialized bytes appended to it. If [left] or [right] + is negative, then bytes are removed (instead of appended) from + the corresponding side of [s]. + + Raise [Invalid_argument] if the result length is negative or + longer than {!Sys.max_string_length} bytes. *) + +val fill : bytes -> int -> int -> char -> unit +(** [fill s start len c] modifies [s] in place, replacing [len] + characters with [c], starting at [start]. + + Raise [Invalid_argument] if [start] and [len] do not designate a + valid range of [s]. *) + +val blit : bytes -> int -> bytes -> int -> int -> unit +(** [blit src srcoff dst dstoff len] copies [len] bytes from sequence + [src], starting at index [srcoff], to sequence [dst], starting at + index [dstoff]. It works correctly even if [src] and [dst] are the + same byte sequence, and the source and destination intervals + overlap. + + Raise [Invalid_argument] if [srcoff] and [len] do not + designate a valid range of [src], or if [dstoff] and [len] + do not designate a valid range of [dst]. *) + +val blit_string : string -> int -> bytes -> int -> int -> unit +(** [blit src srcoff dst dstoff len] copies [len] bytes from string + [src], starting at index [srcoff], to byte sequence [dst], + starting at index [dstoff]. + + Raise [Invalid_argument] if [srcoff] and [len] do not + designate a valid range of [src], or if [dstoff] and [len] + do not designate a valid range of [dst]. *) + +val concat : bytes -> bytes list -> bytes +(** [concat sep sl] concatenates the list of byte sequences [sl], + inserting the separator byte sequence [sep] between each, and + returns the result as a new byte sequence. + + Raise [Invalid_argument] if the result is longer than + {!Sys.max_string_length} bytes. *) + +val cat : bytes -> bytes -> bytes +(** [cat s1 s2] concatenates [s1] and [s2] and returns the result + as new byte sequence. + + Raise [Invalid_argument] if the result is longer than + {!Sys.max_string_length} bytes. *) + +val iter : (char -> unit) -> bytes -> unit +(** [iter f s] applies function [f] in turn to all the bytes of [s]. + It is equivalent to [f (get s 0); f (get s 1); ...; f (get s + (length s - 1)); ()]. *) + +val iteri : (int -> char -> unit) -> bytes -> unit +(** Same as {!Bytes.iter}, but the function is applied to the index of + the byte as first argument and the byte itself as second + argument. *) + +val map : (char -> char) -> bytes -> bytes +(** [map f s] applies function [f] in turn to all the bytes of [s] + (in increasing index order) and stores the resulting bytes in + a new sequence that is returned as the result. *) + +val mapi : (int -> char -> char) -> bytes -> bytes +(** [mapi f s] calls [f] with each character of [s] and its + index (in increasing index order) and stores the resulting bytes + in a new sequence that is returned as the result. *) + +val trim : bytes -> bytes +(** Return a copy of the argument, without leading and trailing + whitespace. The bytes regarded as whitespace are the ASCII + characters [' '], ['\012'], ['\n'], ['\r'], and ['\t']. *) + +val escaped : bytes -> bytes +(** Return a copy of the argument, with special characters represented + by escape sequences, following the lexical conventions of OCaml. + + Raise [Invalid_argument] if the result is longer than + {!Sys.max_string_length} bytes. *) + +val index : bytes -> char -> int +(** [index s c] returns the index of the first occurrence of byte [c] + in [s]. + + Raise [Not_found] if [c] does not occur in [s]. *) + +val rindex : bytes -> char -> int +(** [rindex s c] returns the index of the last occurrence of byte [c] + in [s]. + + Raise [Not_found] if [c] does not occur in [s]. *) + +val index_from : bytes -> int -> char -> int +(** [index_from s i c] returns the index of the first occurrence of + byte [c] in [s] after position [i]. [Bytes.index s c] is + equivalent to [Bytes.index_from s 0 c]. + + Raise [Invalid_argument] if [i] is not a valid position in [s]. + Raise [Not_found] if [c] does not occur in [s] after position [i]. *) + +val rindex_from : bytes -> int -> char -> int +(** [rindex_from s i c] returns the index of the last occurrence of + byte [c] in [s] before position [i+1]. [rindex s c] is equivalent + to [rindex_from s (Bytes.length s - 1) c]. + + Raise [Invalid_argument] if [i+1] is not a valid position in [s]. + Raise [Not_found] if [c] does not occur in [s] before position [i+1]. *) + +val contains : bytes -> char -> bool +(** [contains s c] tests if byte [c] appears in [s]. *) + +val contains_from : bytes -> int -> char -> bool +(** [contains_from s start c] tests if byte [c] appears in [s] after + position [start]. [contains s c] is equivalent to [contains_from + s 0 c]. + + Raise [Invalid_argument] if [start] is not a valid position in [s]. *) + +val rcontains_from : bytes -> int -> char -> bool +(** [rcontains_from s stop c] tests if byte [c] appears in [s] before + position [stop+1]. + + Raise [Invalid_argument] if [stop < 0] or [stop+1] is not a valid + position in [s]. *) + +val uppercase : bytes -> bytes +(** Return a copy of the argument, with all lowercase letters + translated to uppercase, including accented letters of the ISO + Latin-1 (8859-1) character set. *) + +val lowercase : bytes -> bytes +(** Return a copy of the argument, with all uppercase letters + translated to lowercase, including accented letters of the ISO + Latin-1 (8859-1) character set. *) + +val capitalize : bytes -> bytes +(** Return a copy of the argument, with the first byte set to + uppercase. *) + +val uncapitalize : bytes -> bytes +(** Return a copy of the argument, with the first byte set to + lowercase. *) + +type t = bytes +(** An alias for the type of byte sequences. *) + +val compare: t -> t -> int +(** The comparison function for byte sequences, with the same + specification as {!Pervasives.compare}. Along with the type [t], + this function [compare] allows the module [Bytes] to be passed as + argument to the functors {!Set.Make} and {!Map.Make}. *) + + +(** {4 Unsafe conversions (for advanced users)} + + This section describes unsafe, low-level conversion functions + between [bytes] and [string]. They do not copy the internal data; + used improperly, they can break the immutability invariant on + strings provided by the [-safe-string] option. They are available for + expert library authors, but for most purposes you should use the + always-correct {!Bytes.to_string} and {!Bytes.of_string} instead. +*) + +val unsafe_to_string : bytes -> string +(** Unsafely convert a byte sequence into a string. + + To reason about the use of [unsafe_to_string], it is convenient to + consider an "ownership" discipline. A piece of code that + manipulates some data "owns" it; there are several disjoint ownership + modes, including: + - Unique ownership: the data may be accessed and mutated + - Shared ownership: the data has several owners, that may only + access it, not mutate it. + + Unique ownership is linear: passing the data to another piece of + code means giving up ownership (we cannot write the + data again). A unique owner may decide to make the data shared + (giving up mutation rights on it), but shared data may not become + uniquely-owned again. + + [unsafe_to_string s] can only be used when the caller owns the byte + sequence [s] -- either uniquely or as shared immutable data. The + caller gives up ownership of [s], and gains ownership of the + returned string. + + There are two valid use-cases that respect this ownership + discipline: + + 1. Creating a string by initializing and mutating a byte sequence + that is never changed after initialization is performed. + + {[ +let string_init len f : string = + let s = Bytes.create len in + for i = 0 to len - 1 do Bytes.set s i (f i) done; + Bytes.unsafe_to_string s + ]} + + This function is safe because the byte sequence [s] will never be + accessed or mutated after [unsafe_to_string] is called. The + [string_init] code gives up ownership of [s], and returns the + ownership of the resulting string to its caller. + + Note that it would be unsafe if [s] was passed as an additional + parameter to the function [f] as it could escape this way and be + mutated in the future -- [string_init] would give up ownership of + [s] to pass it to [f], and could not call [unsafe_to_string] + safely. + + We have provided the {!String.init}, {!String.map} and + {!String.mapi} functions to cover most cases of building + new strings. You should prefer those over [to_string] or + [unsafe_to_string] whenever applicable. + + 2. Temporarily giving ownership of a byte sequence to a function + that expects a uniquely owned string and returns ownership back, so + that we can mutate the sequence again after the call ended. + + {[ +let bytes_length (s : bytes) = + String.length (Bytes.unsafe_to_string s) + ]} + + In this use-case, we do not promise that [s] will never be mutated + after the call to [bytes_length s]. The {!String.length} function + temporarily borrows unique ownership of the byte sequence + (and sees it as a [string]), but returns this ownership back to + the caller, which may assume that [s] is still a valid byte + sequence after the call. Note that this is only correct because we + know that {!String.length} does not capture its argument -- it could + escape by a side-channel such as a memoization combinator. + + The caller may not mutate [s] while the string is borrowed (it has + temporarily given up ownership). This affects concurrent programs, + but also higher-order functions: if [String.length] returned + a closure to be called later, [s] should not be mutated until this + closure is fully applied and returns ownership. +*) + +val unsafe_of_string : string -> bytes +(** Unsafely convert a shared string to a byte sequence that should + not be mutated. + + The same ownership discipline that makes [unsafe_to_string] + correct applies to [unsafe_of_string]: you may use it if you were + the owner of the [string] value, and you will own the return + [bytes] in the same mode. + + In practice, unique ownership of string values is extremely + difficult to reason about correctly. You should always assume + strings are shared, never uniquely owned. + + For example, string literals are implicitly shared by the + compiler, so you never uniquely own them. + + {[ +let incorrect = Bytes.unsafe_of_string "hello" +let s = Bytes.of_string "hello" + ]} + + The first declaration is incorrect, because the string literal + ["hello"] could be shared by the compiler with other parts of the + program, and mutating [incorrect] is a bug. You must always use + the second version, which performs a copy and is thus correct. + + Assuming unique ownership of strings that are not string + literals, but are (partly) built from string literals, is also + incorrect. For example, mutating [unsafe_of_string ("foo" ^ s)] + could mutate the shared string ["foo"] -- assuming a rope-like + representation of strings. More generally, functions operating on + strings will assume shared ownership, they do not preserve unique + ownership. It is thus incorrect to assume unique ownership of the + result of [unsafe_of_string]. + + The only case we have reasonable confidence is safe is if the + produced [bytes] is shared -- used as an immutable byte + sequence. This is possibly useful for incremental migration of + low-level programs that manipulate immutable sequences of bytes + (for example {!Marshal.from_bytes}) and previously used the + [string] type for this purpose. +*) + +(**/**) + +(* The following is for system use only. Do not call directly. *) + +external unsafe_get : bytes -> int -> char = "%string_unsafe_get" +external unsafe_set : bytes -> int -> char -> unit = "%string_unsafe_set" +external unsafe_blit : + bytes -> int -> bytes -> int -> int -> unit + = "caml_blit_string" "noalloc" +external unsafe_fill : + bytes -> int -> int -> char -> unit = "caml_fill_string" "noalloc" diff --git a/stdlib/bytesLabels.ml b/stdlib/bytesLabels.ml new file mode 100644 index 000000000..8ec8ec9e1 --- /dev/null +++ b/stdlib/bytesLabels.ml @@ -0,0 +1,16 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Jacques Garrigue, Kyoto University RIMS *) +(* *) +(* Copyright 2001 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the GNU Library General Public License, with *) +(* the special exception on linking described in file ../LICENSE. *) +(* *) +(***********************************************************************) + +(* Module [BytesLabels]: labelled Bytes module *) + +include Bytes diff --git a/stdlib/bytesLabels.mli b/stdlib/bytesLabels.mli new file mode 100644 index 000000000..d48d95f5c --- /dev/null +++ b/stdlib/bytesLabels.mli @@ -0,0 +1,213 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the GNU Library General Public License, with *) +(* the special exception on linking described in file ../LICENSE. *) +(* *) +(***********************************************************************) + +(** Byte sequence operations. *) + +external length : bytes -> int = "%string_length" +(** Return the length (number of bytes) of the argument. *) + +external get : bytes -> int -> char = "%string_safe_get" +(** [get s n] returns the byte at index [n] in argument [s]. + + Raise [Invalid_argument] if [n] not a valid index in [s]. *) + + +external set : bytes -> int -> char -> unit = "%string_safe_set" +(** [set s n c] modifies [s] in place, replacing the byte at index [n] + with [c]. + + Raise [Invalid_argument] if [n] is not a valid index in [s]. *) + +external create : int -> bytes = "caml_create_string" +(** [create n] returns a new byte sequence of length [n]. The + sequence is uninitialized and contains arbitrary bytes. + + Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *) + +val make : int -> char -> bytes +(** [make n c] returns a new byte sequence of length [n], filled with + the byte [c]. + + Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *) + +val init : int -> f:(int -> char) -> bytes +(** [init n f] returns a fresh byte sequence of length [n], + with character [i] initialized to the result of [f i]. + + Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *) + +val empty : bytes +(** A byte sequence of size 0. *) + +val copy : bytes -> bytes +(** Return a new byte sequence that contains the same bytes as the + argument. *) + +val of_string : string -> bytes +(** Return a new byte sequence that contains the same bytes as the + given string. *) + +val to_string : bytes -> string +(** Return a new string that contains the same bytes as the given byte + sequence. *) + +val sub : bytes -> pos:int -> len:int -> bytes +(** [sub s start len] returns a new byte sequence of length [len], + containing the subsequence of [s] that starts at position [start] + and has length [len]. + + Raise [Invalid_argument] if [start] and [len] do not designate a + valid range of [s]. *) + +val sub_string : bytes -> int -> int -> string +(** Same as [sub] but return a string instead of a byte sequence. *) + +val fill : bytes -> pos:int -> len:int -> char -> unit +(** [fill s start len c] modifies [s] in place, replacing [len] + characters with [c], starting at [start]. + + Raise [Invalid_argument] if [start] and [len] do not designate a + valid range of [s]. *) + +val blit : + src:bytes -> src_pos:int -> dst:bytes -> dst_pos:int -> len:int + -> unit +(** [blit src srcoff dst dstoff len] copies [len] bytes from sequence + [src], starting at index [srcoff], to sequence [dst], starting at + index [dstoff]. It works correctly even if [src] and [dst] are the + same byte sequence, and the source and destination intervals + overlap. + + Raise [Invalid_argument] if [srcoff] and [len] do not + designate a valid range of [src], or if [dstoff] and [len] + do not designate a valid range of [dst]. *) + +val concat : sep:bytes -> bytes list -> bytes +(** [concat sep sl] concatenates the list of byte sequences [sl], + inserting the separator byte sequence [sep] between each, and + returns the result as a new byte sequence. *) + +val iter : f:(char -> unit) -> bytes -> unit +(** [iter f s] applies function [f] in turn to all the bytes of [s]. + It is equivalent to [f (get s 0); f (get s 1); ...; f (get s + (length s - 1)); ()]. *) + +val iteri : f:(int -> char -> unit) -> bytes -> unit +(** Same as {!Bytes.iter}, but the function is applied to the index of + the byte as first argument and the byte itself as second + argument. *) + +val map : f:(char -> char) -> bytes -> bytes +(** [map f s] applies function [f] in turn to all the bytes of [s] and + stores the resulting bytes in a new sequence that is returned as + the result. *) + +val mapi : f:(int -> char -> char) -> bytes -> bytes +(** [mapi f s] calls [f] with each character of [s] and its + index (in increasing index order) and stores the resulting bytes + in a new sequence that is returned as the result. *) + +val trim : bytes -> bytes +(** Return a copy of the argument, without leading and trailing + whitespace. The bytes regarded as whitespace are the ASCII + characters [' '], ['\012'], ['\n'], ['\r'], and ['\t']. *) + +val escaped : bytes -> bytes +(** Return a copy of the argument, with special characters represented + by escape sequences, following the lexical conventions of OCaml. *) + +val index : bytes -> char -> int +(** [index s c] returns the index of the first occurrence of byte [c] + in [s]. + + Raise [Not_found] if [c] does not occur in [s]. *) + +val rindex : bytes -> char -> int +(** [rindex s c] returns the index of the last occurrence of byte [c] + in [s]. + + Raise [Not_found] if [c] does not occur in [s]. *) + +val index_from : bytes -> int -> char -> int +(** [index_from s i c] returns the index of the first occurrence of + byte [c] in [s] after position [i]. [Bytes.index s c] is + equivalent to [Bytes.index_from s 0 c]. + + Raise [Invalid_argument] if [i] is not a valid position in [s]. + Raise [Not_found] if [c] does not occur in [s] after position [i]. *) + +val rindex_from : bytes -> int -> char -> int +(** [rindex_from s i c] returns the index of the last occurrence of + byte [c] in [s] before position [i+1]. [rindex s c] is equivalent + to [rindex_from s (Bytes.length s - 1) c]. + + Raise [Invalid_argument] if [i+1] is not a valid position in [s]. + Raise [Not_found] if [c] does not occur in [s] before position [i+1]. *) + +val contains : bytes -> char -> bool +(** [contains s c] tests if byte [c] appears in [s]. *) + +val contains_from : bytes -> int -> char -> bool +(** [contains_from s start c] tests if byte [c] appears in [s] after + position [start]. [contains s c] is equivalent to [contains_from + s 0 c]. + + Raise [Invalid_argument] if [start] is not a valid position in [s]. *) + +val rcontains_from : bytes -> int -> char -> bool +(** [rcontains_from s stop c] tests if byte [c] appears in [s] before + position [stop+1]. + + Raise [Invalid_argument] if [stop < 0] or [stop+1] is not a valid + position in [s]. *) + +val uppercase : bytes -> bytes +(** Return a copy of the argument, with all lowercase letters + translated to uppercase, including accented letters of the ISO + Latin-1 (8859-1) character set. *) + +val lowercase : bytes -> bytes +(** Return a copy of the argument, with all uppercase letters + translated to lowercase, including accented letters of the ISO + Latin-1 (8859-1) character set. *) + +val capitalize : bytes -> bytes +(** Return a copy of the argument, with the first byte set to + uppercase. *) + +val uncapitalize : bytes -> bytes +(** Return a copy of the argument, with the first byte set to + lowercase. *) + +type t = bytes +(** An alias for the type of byte sequences. *) + +val compare: t -> t -> int +(** The comparison function for byte sequences, with the same + specification as {!Pervasives.compare}. Along with the type [t], + this function [compare] allows the module [Bytes] to be passed as + argument to the functors {!Set.Make} and {!Map.Make}. *) + +(**/**) + +(* The following is for system use only. Do not call directly. *) + +external unsafe_get : bytes -> int -> char = "%string_unsafe_get" +external unsafe_set : bytes -> int -> char -> unit = "%string_unsafe_set" +external unsafe_blit : + src:bytes -> src_pos:int -> dst:bytes -> dst_pos:int -> len:int -> + unit = "caml_blit_string" "noalloc" +external unsafe_fill : + bytes -> pos:int -> len:int -> char -> unit = "caml_fill_string" "noalloc" +val unsafe_to_string : bytes -> string +val unsafe_of_string : string -> bytes diff --git a/stdlib/camlinternalFormat.ml b/stdlib/camlinternalFormat.ml new file mode 100644 index 000000000..7fb82dbe2 --- /dev/null +++ b/stdlib/camlinternalFormat.ml @@ -0,0 +1,2644 @@ +open CamlinternalFormatBasics + +(******************************************************************************) + (* Tools to manipulate scanning set of chars (see %[...]) *) + +type mutable_char_set = bytes + +(* Create a fresh, empty, mutable char set. *) +let create_char_set () = Bytes.make 32 '\000' + +(* Add a char in a mutable char set. *) +let add_in_char_set char_set c = + let ind = int_of_char c in + let str_ind = ind lsr 3 and mask = 1 lsl (ind land 0b111) in + Bytes.set char_set str_ind + (char_of_int (int_of_char (Bytes.get char_set str_ind) lor mask)) + +let freeze_char_set char_set = + Bytes.to_string char_set + +(* Compute the complement of a char set. *) +let rev_char_set char_set = + let char_set' = create_char_set () in + for i = 0 to 31 do + Bytes.set char_set' i + (char_of_int (int_of_char (String.get char_set i) lxor 0xFF)); + done; + Bytes.unsafe_to_string char_set' + +(* Return true if a `c' is in `char_set'. *) +let is_in_char_set char_set c = + let ind = int_of_char c in + let str_ind = ind lsr 3 and mask = 1 lsl (ind land 0b111) in + (int_of_char (String.get char_set str_ind) land mask) <> 0 + + +(******************************************************************************) + (* Ignored param conversion *) + +(* GADT used to abstract an existential type parameter. *) +(* See param_format_of_ignored_format. *) +type ('a, 'b, 'c, 'd, 'e, 'f) param_format_ebb = Param_format_EBB : + ('x -> 'a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('a, 'b, 'c, 'd, 'e, 'f) param_format_ebb + +(* Compute a padding associated to a pad_option (see "%_42d"). *) +let pad_of_pad_opt pad_opt = match pad_opt with + | None -> No_padding + | Some width -> Lit_padding (Right, width) + +(* Compute a precision associated to a prec_option (see "%_.42f"). *) +let prec_of_prec_opt prec_opt = match prec_opt with + | None -> No_precision + | Some ndec -> Lit_precision ndec + +(* Turn an ignored param into its equivalent not-ignored format node. *) +(* Used for format pretty-printing and Scanf. *) +let param_format_of_ignored_format : type a b c d e f x y . + (a, b, c, d, y, x) ignored -> (x, b, c, y, e, f) fmt -> + (a, b, c, d, e, f) param_format_ebb = +fun ign fmt -> match ign with + | Ignored_char -> + Param_format_EBB (Char fmt) + | Ignored_caml_char -> + Param_format_EBB (Caml_char fmt) + | Ignored_string pad_opt -> + Param_format_EBB (String (pad_of_pad_opt pad_opt, fmt)) + | Ignored_caml_string pad_opt -> + Param_format_EBB (Caml_string (pad_of_pad_opt pad_opt, fmt)) + | Ignored_int (iconv, pad_opt) -> + Param_format_EBB (Int (iconv, pad_of_pad_opt pad_opt, No_precision, fmt)) + | Ignored_int32 (iconv, pad_opt) -> + Param_format_EBB + (Int32 (iconv, pad_of_pad_opt pad_opt, No_precision, fmt)) + | Ignored_nativeint (iconv, pad_opt) -> + Param_format_EBB + (Nativeint (iconv, pad_of_pad_opt pad_opt, No_precision, fmt)) + | Ignored_int64 (iconv, pad_opt) -> + Param_format_EBB + (Int64 (iconv, pad_of_pad_opt pad_opt, No_precision, fmt)) + | Ignored_float (pad_opt, prec_opt) -> + Param_format_EBB + (Float (Float_f, pad_of_pad_opt pad_opt, prec_of_prec_opt prec_opt, fmt)) + | Ignored_bool -> + Param_format_EBB (Bool fmt) + | Ignored_format_arg (pad_opt, fmtty) -> + Param_format_EBB (Format_arg (pad_opt, fmtty, fmt)) + | Ignored_format_subst (pad_opt, fmtty) -> + Param_format_EBB + (Format_subst (pad_opt, fmtty, fmt)) + | Ignored_reader -> + Param_format_EBB (Reader fmt) + | Ignored_scan_char_set (width_opt, char_set) -> + Param_format_EBB (Scan_char_set (width_opt, char_set, fmt)) + | Ignored_scan_get_counter counter -> + Param_format_EBB (Scan_get_counter (counter, fmt)) + + +(******************************************************************************) + (* Types *) + +type ('b, 'c) acc_formatting_gen = + | Acc_open_tag of ('b, 'c) acc + | Acc_open_box of ('b, 'c) acc + +(* Reversed list of printing atoms. *) +(* Used to accumulate printf arguments. *) +and ('b, 'c) acc = + | Acc_formatting_lit of ('b, 'c) acc * formatting_lit (* Special fmtting (box) *) + | Acc_formatting_gen of ('b, 'c) acc * ('b, 'c) acc_formatting_gen (* Special fmtting (box) *) + | Acc_string_literal of ('b, 'c) acc * string (* Literal string *) + | Acc_char_literal of ('b, 'c) acc * char (* Literal char *) + | Acc_data_string of ('b, 'c) acc * string (* Generated string *) + | Acc_data_char of ('b, 'c) acc * char (* Generated char *) + | Acc_delay of ('b, 'c) acc * ('b -> 'c) (* Delayed printing (%a, %t) *) + | Acc_flush of ('b, 'c) acc (* Flush *) + | Acc_invalid_arg of ('b, 'c) acc * string (* Raise Invalid_argument msg *) + | End_of_acc + +(* List of heterogeneous values. *) +(* Used to accumulate scanf callback arguments. *) +type ('a, 'b) heter_list = + | Cons : 'c * ('a, 'b) heter_list -> ('c -> 'a, 'b) heter_list + | Nil : ('b, 'b) heter_list + +(* Existential Black Boxes. *) +(* Used to abstract some existential type parameters. *) + +(* GADT type associating a padding and an fmtty. *) +(* See the type_padding function. *) +type ('a, 'b, 'c, 'd, 'e, 'f) padding_fmtty_ebb = Padding_fmtty_EBB : + ('x, 'y) padding * ('y, 'b, 'c, 'd, 'e, 'f) fmtty -> + ('x, 'b, 'c, 'd, 'e, 'f) padding_fmtty_ebb + +(* GADT type associating a padding, a precision and an fmtty. *) +(* See the type_padprec function. *) +type ('a, 'b, 'c, 'd, 'e, 'f) padprec_fmtty_ebb = Padprec_fmtty_EBB : + ('x, 'y) padding * ('y, 'z) precision * ('z, 'b, 'c, 'd, 'e, 'f) fmtty -> + ('x, 'b, 'c, 'd, 'e, 'f) padprec_fmtty_ebb + +(* GADT type associating a padding and an fmt. *) +(* See make_padding_fmt_ebb and parse_format functions. *) +type ('a, 'b, 'c, 'e, 'f) padding_fmt_ebb = Padding_fmt_EBB : + (_, 'x -> 'a) padding * + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('x, 'b, 'c, 'e, 'f) padding_fmt_ebb + +(* GADT type associating a precision and an fmt. *) +(* See make_precision_fmt_ebb and parse_format functions. *) +type ('a, 'b, 'c, 'e, 'f) precision_fmt_ebb = Precision_fmt_EBB : + (_, 'x -> 'a) precision * + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('x, 'b, 'c, 'e, 'f) precision_fmt_ebb + +(* GADT type associating a padding, a precision and an fmt. *) +(* See make_padprec_fmt_ebb and parse_format functions. *) +type ('p, 'b, 'c, 'e, 'f) padprec_fmt_ebb = Padprec_fmt_EBB : + ('x, 'y) padding * ('y, 'p -> 'a) precision * + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('p, 'b, 'c, 'e, 'f) padprec_fmt_ebb + +(* Abstract the 'a and 'd parameters of an fmt. *) +(* Output type of the format parsing function. *) +type ('b, 'c, 'e, 'f) fmt_ebb = Fmt_EBB : + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('b, 'c, 'e, 'f) fmt_ebb + +(* GADT type associating an fmtty and an fmt. *) +(* See the type_format_gen function. *) +type ('a, 'b, 'c, 'd, 'e, 'f) fmt_fmtty_ebb = Fmt_fmtty_EBB : + ('a, 'b, 'c, 'd, 'y, 'x) fmt * + ('x, 'b, 'c, 'y, 'e, 'f) fmtty -> + ('a, 'b, 'c, 'd, 'e, 'f) fmt_fmtty_ebb + +(* GADT type associating an fmtty and an fmt. *) +(* See the type_ignored_format_substitution function. *) +type ('a, 'b, 'c, 'd, 'e, 'f) fmtty_fmt_ebb = Fmtty_fmt_EBB : + ('a, 'b, 'c, 'd, 'y, 'x) fmtty * + ('x, 'b, 'c, 'y, 'e, 'f) fmt_fmtty_ebb -> + ('a, 'b, 'c, 'd, 'e, 'f) fmtty_fmt_ebb + +(* Abstract all fmtty type parameters. *) +(* Used to compare format types. *) +type fmtty_ebb = Fmtty_EBB : ('a, 'b, 'c, 'd, 'e, 'f) fmtty -> fmtty_ebb + +(* Abstract all padding type parameters. *) +(* Used to compare paddings. *) +type padding_ebb = Padding_EBB : ('a, 'b) padding -> padding_ebb + +(* Abstract all precision type parameters. *) +(* Used to compare precisions. *) +type precision_ebb = Precision_EBB : ('a, 'b) precision -> precision_ebb + +(******************************************************************************) + (* Constants *) + +(* Default precision for float printing. *) +let default_float_precision = 6 + +(******************************************************************************) + (* Externals *) + +external format_float: string -> float -> string + = "caml_format_float" +external format_int: string -> int -> string + = "caml_format_int" +external format_int32: string -> int32 -> string + = "caml_int32_format" +external format_nativeint: string -> nativeint -> string + = "caml_nativeint_format" +external format_int64: string -> int64 -> string + = "caml_int64_format" + +(******************************************************************************) + (* Tools to pretty-print formats *) + +(* Type of extensible character buffers. *) +type buffer = { + mutable ind : int; + mutable bytes : bytes; +} + +(* Create a fresh buffer. *) +let buffer_create init_size = { ind = 0; bytes = Bytes.create init_size } + +(* Check size of the buffer and grow it if needed. *) +let buffer_check_size buf overhead = + let len = Bytes.length buf.bytes in + let min_len = buf.ind + overhead in + if min_len > len then ( + let new_len = max (len * 2) min_len in + let new_str = Bytes.create new_len in + Bytes.blit buf.bytes 0 new_str 0 len; + buf.bytes <- new_str; + ) + +(* Add the character `c' to the buffer `buf'. *) +let buffer_add_char buf c = + buffer_check_size buf 1; + Bytes.set buf.bytes buf.ind c; + buf.ind <- buf.ind + 1 + +(* Add the string `s' to the buffer `buf'. *) +let buffer_add_string buf s = + let str_len = String.length s in + buffer_check_size buf str_len; + String.blit s 0 buf.bytes buf.ind str_len; + buf.ind <- buf.ind + str_len + +(* Get the content of the buffer. *) +let buffer_contents buf = + Bytes.sub_string buf.bytes 0 buf.ind + +(***) + +(* Convert an integer conversion to char. *) +let char_of_iconv iconv = match iconv with + | Int_d | Int_pd | Int_sd -> 'd' | Int_i | Int_pi | Int_si -> 'i' + | Int_x | Int_Cx -> 'x' | Int_X | Int_CX -> 'X' | Int_o | Int_Co -> 'o' + | Int_u -> 'u' + +(* Convert a float conversion to char. *) +let char_of_fconv fconv = match fconv with + | Float_f | Float_pf | Float_sf -> 'f' | Float_e | Float_pe | Float_se -> 'e' + | Float_E | Float_pE | Float_sE -> 'E' | Float_g | Float_pg | Float_sg -> 'g' + | Float_G | Float_pG | Float_sG -> 'G' | Float_F -> 'F' + +(* Convert a scanning counter to char. *) +let char_of_counter counter = match counter with + | Line_counter -> 'l' + | Char_counter -> 'n' + | Token_counter -> 'N' + +(***) + +(* Print a char_set in a buffer with the OCaml format lexical convention. *) +let bprint_char_set buf char_set = + let rec print_start set = + let is_alone c = + let before, after = Char.(chr (code c - 1), chr (code c + 1)) in + is_in_char_set set c + && not (is_in_char_set set before && is_in_char_set set after) in + if is_alone ']' then buffer_add_char buf ']'; + print_out set 1; + if is_alone '-' then buffer_add_char buf '-'; + and print_out set i = + if i < 256 then + if is_in_char_set set (char_of_int i) then print_first set i + else print_out set (i + 1) + and print_first set i = + match char_of_int i with + | '\255' -> print_char buf 255; + | ']' | '-' -> print_out set (i + 1); + | _ -> print_second set (i + 1); + and print_second set i = + if is_in_char_set set (char_of_int i) then + match char_of_int i with + | '\255' -> + print_char buf 254; + print_char buf 255; + | ']' | '-' when not (is_in_char_set set (char_of_int (i + 1))) -> + print_char buf (i - 1); + print_out set (i + 1); + | _ when not (is_in_char_set set (char_of_int (i + 1))) -> + print_char buf (i - 1); + print_char buf i; + print_out set (i + 2); + | _ -> + print_in set (i - 1) (i + 2); + else ( + print_char buf (i - 1); + print_out set (i + 1); + ) + and print_in set i j = + if j = 256 || not (is_in_char_set set (char_of_int j)) then ( + print_char buf i; + print_char buf (int_of_char '-'); + print_char buf (j - 1); + if j < 256 then print_out set (j + 1); + ) else + print_in set i (j + 1); + and print_char buf i = match char_of_int i with + | '%' -> buffer_add_char buf '%'; buffer_add_char buf '%'; + | '@' -> buffer_add_char buf '%'; buffer_add_char buf '@'; + | c -> buffer_add_char buf c; + in + buffer_add_char buf '['; + print_start ( + if is_in_char_set char_set '\000' + then ( buffer_add_char buf '^'; rev_char_set char_set ) + else char_set + ); + buffer_add_char buf ']' + +(***) + +(* Print a padty in a buffer with the format-like syntax. *) +let bprint_padty buf padty = match padty with + | Left -> buffer_add_char buf '-' + | Right -> () + | Zeros -> buffer_add_char buf '0' + +(* Print the '_' of an ignored flag if needed. *) +let bprint_ignored_flag buf ign_flag = + if ign_flag then buffer_add_char buf '_' + +(***) + +let bprint_pad_opt buf pad_opt = match pad_opt with + | None -> () + | Some width -> buffer_add_string buf (string_of_int width) + +(***) + +(* Print padding in a buffer with the format-like syntax. *) +let bprint_padding : type a b . buffer -> (a, b) padding -> unit = +fun buf pad -> match pad with + | No_padding -> () + | Lit_padding (padty, n) -> + bprint_padty buf padty; + buffer_add_string buf (string_of_int n); + | Arg_padding padty -> + bprint_padty buf padty; + buffer_add_char buf '*' + +(* Print precision in a buffer with the format-like syntax. *) +let bprint_precision : type a b . buffer -> (a, b) precision -> unit = + fun buf prec -> match prec with + | No_precision -> () + | Lit_precision n -> + buffer_add_char buf '.'; + buffer_add_string buf (string_of_int n); + | Arg_precision -> + buffer_add_string buf ".*" + +(***) + +(* Print the optionnal '+', ' ' or '#' associated to an int conversion. *) +let bprint_iconv_flag buf iconv = match iconv with + | Int_pd | Int_pi -> buffer_add_char buf '+' + | Int_sd | Int_si -> buffer_add_char buf ' ' + | Int_Cx | Int_CX | Int_Co -> buffer_add_char buf '#' + | Int_d | Int_i | Int_x | Int_X | Int_o | Int_u -> () + +(* Print an complete int format in a buffer (ex: "%3.*d"). *) +let bprint_int_fmt buf ign_flag iconv pad prec = + buffer_add_char buf '%'; + bprint_ignored_flag buf ign_flag; + bprint_iconv_flag buf iconv; + bprint_padding buf pad; + bprint_precision buf prec; + buffer_add_char buf (char_of_iconv iconv) + +(* Print a complete int32, nativeint or int64 format in a buffer. *) +let bprint_altint_fmt buf ign_flag iconv pad prec c = + buffer_add_char buf '%'; + bprint_ignored_flag buf ign_flag; + bprint_iconv_flag buf iconv; + bprint_padding buf pad; + bprint_precision buf prec; + buffer_add_char buf c; + buffer_add_char buf (char_of_iconv iconv) + +(***) + +(* Print the optionnal '+' associated to a float conversion. *) +let bprint_fconv_flag buf fconv = match fconv with + | Float_pf | Float_pe | Float_pE | Float_pg | Float_pG -> + buffer_add_char buf '+' + | Float_sf | Float_se | Float_sE | Float_sg | Float_sG -> + buffer_add_char buf ' ' + | Float_f | Float_e | Float_E | Float_g | Float_G | Float_F -> + () + +(* Print a complete float format in a buffer (ex: "%+*.3f"). *) +let bprint_float_fmt buf ign_flag fconv pad prec = + buffer_add_char buf '%'; + bprint_ignored_flag buf ign_flag; + bprint_fconv_flag buf fconv; + bprint_padding buf pad; + bprint_precision buf prec; + buffer_add_char buf (char_of_fconv fconv) + +(* Compute the literal string representation of a formatting_lit. *) +(* Also used by Printf and Scanf where formatting is not interpreted. *) +let string_of_formatting_lit formatting_lit = match formatting_lit with + | Close_box -> "@]" + | Close_tag -> "@}" + | Break (str, _, _) -> str + | FFlush -> "@?" + | Force_newline -> "@\n" + | Flush_newline -> "@." + | Magic_size (str, _) -> str + | Escaped_at -> "@@" + | Escaped_percent -> "@%" + | Scan_indic c -> "@" ^ (String.make 1 c) + +(* Compute the literal string representation of a formatting. *) +(* Also used by Printf and Scanf where formatting is not interpreted. *) +let string_of_formatting_gen : type a b c d e f . + (a, b, c, d, e, f) formatting_gen -> string = + fun formatting_gen -> match formatting_gen with + | Open_tag (Format (_, str)) -> str + | Open_box (Format (_, str)) -> str + +(***) + +(* Print a literal char in a buffer, escape '%' by "%%". *) +let bprint_char_literal buf chr = match chr with + | '%' -> buffer_add_string buf "%%" + | _ -> buffer_add_char buf chr + +(* Print a literal string in a buffer, escape all '%' by "%%". *) +let bprint_string_literal buf str = + for i = 0 to String.length str - 1 do + bprint_char_literal buf str.[i] + done + +(******************************************************************************) + (* Format pretty-printing *) + +(* Print a complete format type (an fmtty) in a buffer. *) +let rec bprint_fmtty : type a b c d e f g h i j k l . + buffer -> (a, b, c, d, e, f, g, h, i, j, k, l) fmtty_rel -> unit = +fun buf fmtty -> match fmtty with + | Char_ty rest -> buffer_add_string buf "%c"; bprint_fmtty buf rest; + | String_ty rest -> buffer_add_string buf "%s"; bprint_fmtty buf rest; + | Int_ty rest -> buffer_add_string buf "%i"; bprint_fmtty buf rest; + | Int32_ty rest -> buffer_add_string buf "%li"; bprint_fmtty buf rest; + | Nativeint_ty rest -> buffer_add_string buf "%ni"; bprint_fmtty buf rest; + | Int64_ty rest -> buffer_add_string buf "%Li"; bprint_fmtty buf rest; + | Float_ty rest -> buffer_add_string buf "%f"; bprint_fmtty buf rest; + | Bool_ty rest -> buffer_add_string buf "%B"; bprint_fmtty buf rest; + | Alpha_ty rest -> buffer_add_string buf "%a"; bprint_fmtty buf rest; + | Theta_ty rest -> buffer_add_string buf "%t"; bprint_fmtty buf rest; + | Reader_ty rest -> buffer_add_string buf "%r"; bprint_fmtty buf rest; + + | Ignored_reader_ty rest -> + buffer_add_string buf "%_r"; + bprint_fmtty buf rest; + + | Format_arg_ty (sub_fmtty, rest) -> + buffer_add_string buf "%{"; bprint_fmtty buf sub_fmtty; + buffer_add_string buf "%}"; bprint_fmtty buf rest; + | Format_subst_ty (sub_fmtty, _, rest) -> + buffer_add_string buf "%("; bprint_fmtty buf sub_fmtty; + buffer_add_string buf "%)"; bprint_fmtty buf rest; + + | End_of_fmtty -> () + +(***) + +(* Print a complete format in a buffer. *) +let bprint_fmt buf fmt = + let rec fmtiter : type a b c d e f . + (a, b, c, d, e, f) fmt -> bool -> unit = + fun fmt ign_flag -> match fmt with + | String (pad, rest) -> + buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag; + bprint_padding buf pad; buffer_add_char buf 's'; + fmtiter rest false; + | Caml_string (pad, rest) -> + buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag; + bprint_padding buf pad; buffer_add_char buf 'S'; + fmtiter rest false; + + | Int (iconv, pad, prec, rest) -> + bprint_int_fmt buf ign_flag iconv pad prec; + fmtiter rest false; + | Int32 (iconv, pad, prec, rest) -> + bprint_altint_fmt buf ign_flag iconv pad prec 'l'; + fmtiter rest false; + | Nativeint (iconv, pad, prec, rest) -> + bprint_altint_fmt buf ign_flag iconv pad prec 'n'; + fmtiter rest false; + | Int64 (iconv, pad, prec, rest) -> + bprint_altint_fmt buf ign_flag iconv pad prec 'L'; + fmtiter rest false; + | Float (fconv, pad, prec, rest) -> + bprint_float_fmt buf ign_flag fconv pad prec; + fmtiter rest false; + + | Char rest -> + buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag; + buffer_add_char buf 'c'; fmtiter rest false; + | Caml_char rest -> + buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag; + buffer_add_char buf 'C'; fmtiter rest false; + | Bool rest -> + buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag; + buffer_add_char buf 'B'; fmtiter rest false; + | Alpha rest -> + buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag; + buffer_add_char buf 'a'; fmtiter rest false; + | Theta rest -> + buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag; + buffer_add_char buf 't'; fmtiter rest false; + | Reader rest -> + buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag; + buffer_add_char buf 'r'; fmtiter rest false; + | Flush rest -> + buffer_add_string buf "%!"; + fmtiter rest ign_flag; + + | String_literal (str, rest) -> + bprint_string_literal buf str; + fmtiter rest ign_flag; + | Char_literal (chr, rest) -> + bprint_char_literal buf chr; + fmtiter rest ign_flag; + + | Format_arg (pad_opt, fmtty, rest) -> + buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag; + bprint_pad_opt buf pad_opt; buffer_add_char buf '{'; + bprint_fmtty buf fmtty; buffer_add_char buf '%'; buffer_add_char buf '}'; + fmtiter rest false; + | Format_subst (pad_opt, fmtty, rest) -> + buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag; + bprint_pad_opt buf pad_opt; buffer_add_char buf '('; + bprint_fmtty buf fmtty; buffer_add_char buf '%'; buffer_add_char buf ')'; + fmtiter rest false; + + | Scan_char_set (width_opt, char_set, rest) -> + buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag; + bprint_pad_opt buf width_opt; bprint_char_set buf char_set; + fmtiter rest false; + | Scan_get_counter (counter, rest) -> + buffer_add_char buf '%'; bprint_ignored_flag buf ign_flag; + buffer_add_char buf (char_of_counter counter); + fmtiter rest false; + | Ignored_param (ign, rest) -> + let Param_format_EBB fmt' = param_format_of_ignored_format ign rest in + fmtiter fmt' true; + + | Formatting_lit (fmting_lit, rest) -> + bprint_string_literal buf (string_of_formatting_lit fmting_lit); + fmtiter rest ign_flag; + | Formatting_gen (fmting_gen, rest) -> + bprint_string_literal buf "@{"; + bprint_string_literal buf (string_of_formatting_gen fmting_gen); + fmtiter rest ign_flag; + + | End_of_format -> () + + in fmtiter fmt false + +(***) + +(* Convert a format to string. *) +let string_of_fmt fmt = + let buf = buffer_create 16 in + bprint_fmt buf fmt; + buffer_contents buf + +(******************************************************************************) + (* Type extraction *) + +type (_, _) eq = Refl : ('a, 'a) eq + +(* Invariant: this function is the identity on values. + + In particular, if (ty1, ty2) have equal values, then + (trans (symm ty1) ty2) respects the 'trans' precondition. *) +let rec symm : type a1 b1 c1 d1 e1 f1 a2 b2 c2 d2 e2 f2 . + (a1, b1, c1, d1, e1, f1, + a2, b2, c2, d2, e2, f2) fmtty_rel +-> (a2, b2, c2, d2, e2, f2, + a1, b1, c1, d1, e1, f1) fmtty_rel += function + | Char_ty rest -> Char_ty (symm rest) + | Int_ty rest -> Int_ty (symm rest) + | Int32_ty rest -> Int32_ty (symm rest) + | Int64_ty rest -> Int64_ty (symm rest) + | Nativeint_ty rest -> Nativeint_ty (symm rest) + | Float_ty rest -> Float_ty (symm rest) + | Bool_ty rest -> Bool_ty (symm rest) + | String_ty rest -> String_ty (symm rest) + | Theta_ty rest -> Theta_ty (symm rest) + | Alpha_ty rest -> Alpha_ty (symm rest) + | Reader_ty rest -> Reader_ty (symm rest) + | Ignored_reader_ty rest -> Ignored_reader_ty (symm rest) + | Format_arg_ty (ty, rest) -> + Format_arg_ty (ty, symm rest) + | Format_subst_ty (ty1, ty2, rest) -> + Format_subst_ty (ty2, ty1, symm rest) + | End_of_fmtty -> End_of_fmtty + +let rec fmtty_rel_det : type a1 b c d1 e1 f1 a2 d2 e2 f2 . + (a1, b, c, d1, e1, f1, + a2, b, c, d2, e2, f2) fmtty_rel -> + ((f1, f2) eq -> (a1, a2) eq) + * ((a1, a2) eq -> (f1, f2) eq) + * ((e1, e2) eq -> (d1, d2) eq) + * ((d1, d2) eq -> (e1, e2) eq) += function + | End_of_fmtty -> + (fun Refl -> Refl), + (fun Refl -> Refl), + (fun Refl -> Refl), + (fun Refl -> Refl) + | Char_ty rest -> + let fa, af, ed, de = fmtty_rel_det rest in + (fun Refl -> let Refl = fa Refl in Refl), + (fun Refl -> let Refl = af Refl in Refl), + ed, de + | String_ty rest -> + let fa, af, ed, de = fmtty_rel_det rest in + (fun Refl -> let Refl = fa Refl in Refl), + (fun Refl -> let Refl = af Refl in Refl), + ed, de + | Int_ty rest -> + let fa, af, ed, de = fmtty_rel_det rest in + (fun Refl -> let Refl = fa Refl in Refl), + (fun Refl -> let Refl = af Refl in Refl), + ed, de + | Int32_ty rest -> + let fa, af, ed, de = fmtty_rel_det rest in + (fun Refl -> let Refl = fa Refl in Refl), + (fun Refl -> let Refl = af Refl in Refl), + ed, de + | Int64_ty rest -> + let fa, af, ed, de = fmtty_rel_det rest in + (fun Refl -> let Refl = fa Refl in Refl), + (fun Refl -> let Refl = af Refl in Refl), + ed, de + | Nativeint_ty rest -> + let fa, af, ed, de = fmtty_rel_det rest in + (fun Refl -> let Refl = fa Refl in Refl), + (fun Refl -> let Refl = af Refl in Refl), + ed, de + | Float_ty rest -> + let fa, af, ed, de = fmtty_rel_det rest in + (fun Refl -> let Refl = fa Refl in Refl), + (fun Refl -> let Refl = af Refl in Refl), + ed, de + | Bool_ty rest -> + let fa, af, ed, de = fmtty_rel_det rest in + (fun Refl -> let Refl = fa Refl in Refl), + (fun Refl -> let Refl = af Refl in Refl), + ed, de + + | Theta_ty rest -> + let fa, af, ed, de = fmtty_rel_det rest in + (fun Refl -> let Refl = fa Refl in Refl), + (fun Refl -> let Refl = af Refl in Refl), + ed, de + | Alpha_ty rest -> + let fa, af, ed, de = fmtty_rel_det rest in + (fun Refl -> let Refl = fa Refl in Refl), + (fun Refl -> let Refl = af Refl in Refl), + ed, de + | Reader_ty rest -> + let fa, af, ed, de = fmtty_rel_det rest in + (fun Refl -> let Refl = fa Refl in Refl), + (fun Refl -> let Refl = af Refl in Refl), + (fun Refl -> let Refl = ed Refl in Refl), + (fun Refl -> let Refl = de Refl in Refl) + | Ignored_reader_ty rest -> + let fa, af, ed, de = fmtty_rel_det rest in + (fun Refl -> let Refl = fa Refl in Refl), + (fun Refl -> let Refl = af Refl in Refl), + (fun Refl -> let Refl = ed Refl in Refl), + (fun Refl -> let Refl = de Refl in Refl) + | Format_arg_ty (_ty, rest) -> + let fa, af, ed, de = fmtty_rel_det rest in + (fun Refl -> let Refl = fa Refl in Refl), + (fun Refl -> let Refl = af Refl in Refl), + ed, de + | Format_subst_ty (ty1, ty2, rest) -> + let fa, af, ed, de = fmtty_rel_det rest in + let ty = trans (symm ty1) ty2 in + let ag, ga, dj, jd = fmtty_rel_det ty in + (fun Refl -> let Refl = fa Refl in let Refl = ag Refl in Refl), + (fun Refl -> let Refl = ga Refl in let Refl = af Refl in Refl), + (fun Refl -> let Refl = ed Refl in let Refl = dj Refl in Refl), + (fun Refl -> let Refl = jd Refl in let Refl = de Refl in Refl) + +(* Precondition: we assume that the two fmtty_rel arguments have equal + values (at possibly distinct types); this invariant comes from the way + fmtty_rel witnesses are produced by the type-checker + + The code below uses (assert false) when this assumption is broken. The + code pattern is the following: + + | Foo x, Foo y -> + (* case where indeed both values + start with constructor Foo *) + | Foo _, _ + | _, Foo _ -> + (* different head constructors: broken precondition *) + assert false +*) +and trans : type + a1 b1 c1 d1 e1 f1 + a2 b2 c2 d2 e2 f2 + a3 b3 c3 d3 e3 f3 +. + (a1, b1, c1, d1, e1, f1, + a2, b2, c2, d2, e2, f2) fmtty_rel +-> (a2, b2, c2, d2, e2, f2, + a3, b3, c3, d3, e3, f3) fmtty_rel +-> (a1, b1, c1, d1, e1, f1, + a3, b3, c3, d3, e3, f3) fmtty_rel += fun ty1 ty2 -> match ty1, ty2 with + | Char_ty rest1, Char_ty rest2 -> Char_ty (trans rest1 rest2) + | String_ty rest1, String_ty rest2 -> String_ty (trans rest1 rest2) + | Bool_ty rest1, Bool_ty rest2 -> Bool_ty (trans rest1 rest2) + | Int_ty rest1, Int_ty rest2 -> Int_ty (trans rest1 rest2) + | Int32_ty rest1, Int32_ty rest2 -> Int32_ty (trans rest1 rest2) + | Int64_ty rest1, Int64_ty rest2 -> Int64_ty (trans rest1 rest2) + | Nativeint_ty rest1, Nativeint_ty rest2 -> Nativeint_ty (trans rest1 rest2) + | Float_ty rest1, Float_ty rest2 -> Float_ty (trans rest1 rest2) + + | Alpha_ty rest1, Alpha_ty rest2 -> Alpha_ty (trans rest1 rest2) + | Alpha_ty _, _ -> assert false + | _, Alpha_ty _ -> assert false + + | Theta_ty rest1, Theta_ty rest2 -> Theta_ty (trans rest1 rest2) + | Theta_ty _, _ -> assert false + | _, Theta_ty _ -> assert false + + | Reader_ty rest1, Reader_ty rest2 -> Reader_ty (trans rest1 rest2) + | Reader_ty _, _ -> assert false + | _, Reader_ty _ -> assert false + + | Ignored_reader_ty rest1, Ignored_reader_ty rest2 -> + Ignored_reader_ty (trans rest1 rest2) + | Ignored_reader_ty _, _ -> assert false + | _, Ignored_reader_ty _ -> assert false + + | Format_arg_ty (ty1, rest1), Format_arg_ty (ty2, rest2) -> + Format_arg_ty (trans ty1 ty2, trans rest1 rest2) + | Format_arg_ty _, _ -> assert false + | _, Format_arg_ty _ -> assert false + + | Format_subst_ty (ty11, ty12, rest1), + Format_subst_ty (ty21, ty22, rest2) -> + let ty = trans (symm ty12) ty21 in + let _, f2, _, f4 = fmtty_rel_det ty in + let Refl = f2 Refl in + let Refl = f4 Refl in + Format_subst_ty (ty11, ty22, trans rest1 rest2) + | Format_subst_ty _, _ -> assert false + | _, Format_subst_ty _ -> assert false + + | End_of_fmtty, End_of_fmtty -> End_of_fmtty + | End_of_fmtty, _ -> assert false + | _, End_of_fmtty -> assert false + +let rec fmtty_of_formatting_gen : type a b c d e f . + (a, b, c, d, e, f) formatting_gen -> + (a, b, c, d, e, f) fmtty = +fun formatting_gen -> match formatting_gen with + | Open_tag (Format (fmt, _)) -> fmtty_of_fmt fmt + | Open_box (Format (fmt, _)) -> fmtty_of_fmt fmt + +(* Extract the type representation (an fmtty) of a format. *) +and fmtty_of_fmt : type a b c d e f . + (a, b, c, d, e, f) fmt -> (a, b, c, d, e, f) fmtty = +fun fmtty -> match fmtty with + | String (pad, rest) -> + fmtty_of_padding_fmtty pad (String_ty (fmtty_of_fmt rest)) + | Caml_string (pad, rest) -> + fmtty_of_padding_fmtty pad (String_ty (fmtty_of_fmt rest)) + + | Int (_, pad, prec, rest) -> + let ty_rest = fmtty_of_fmt rest in + let prec_ty = fmtty_of_precision_fmtty prec (Int_ty ty_rest) in + fmtty_of_padding_fmtty pad prec_ty + | Int32 (_, pad, prec, rest) -> + let ty_rest = fmtty_of_fmt rest in + let prec_ty = fmtty_of_precision_fmtty prec (Int32_ty ty_rest) in + fmtty_of_padding_fmtty pad prec_ty + | Nativeint (_, pad, prec, rest) -> + let ty_rest = fmtty_of_fmt rest in + let prec_ty = fmtty_of_precision_fmtty prec (Nativeint_ty ty_rest) in + fmtty_of_padding_fmtty pad prec_ty + | Int64 (_, pad, prec, rest) -> + let ty_rest = fmtty_of_fmt rest in + let prec_ty = fmtty_of_precision_fmtty prec (Int64_ty ty_rest) in + fmtty_of_padding_fmtty pad prec_ty + | Float (_, pad, prec, rest) -> + let ty_rest = fmtty_of_fmt rest in + let prec_ty = fmtty_of_precision_fmtty prec (Float_ty ty_rest) in + fmtty_of_padding_fmtty pad prec_ty + + | Char rest -> Char_ty (fmtty_of_fmt rest) + | Caml_char rest -> Char_ty (fmtty_of_fmt rest) + | Bool rest -> Bool_ty (fmtty_of_fmt rest) + | Alpha rest -> Alpha_ty (fmtty_of_fmt rest) + | Theta rest -> Theta_ty (fmtty_of_fmt rest) + | Reader rest -> Reader_ty (fmtty_of_fmt rest) + + | Format_arg (_, ty, rest) -> + Format_arg_ty (ty, fmtty_of_fmt rest) + | Format_subst (_, ty, rest) -> + Format_subst_ty (ty, ty, fmtty_of_fmt rest) + + | Flush rest -> fmtty_of_fmt rest + | String_literal (_, rest) -> fmtty_of_fmt rest + | Char_literal (_, rest) -> fmtty_of_fmt rest + + | Scan_char_set (_, _, rest) -> String_ty (fmtty_of_fmt rest) + | Scan_get_counter (_, rest) -> Int_ty (fmtty_of_fmt rest) + | Ignored_param (ign, rest) -> fmtty_of_ignored_format ign rest + | Formatting_lit (_, rest) -> fmtty_of_fmt rest + | Formatting_gen (fmting_gen, rest) -> + concat_fmtty (fmtty_of_formatting_gen fmting_gen) (fmtty_of_fmt rest) + + | End_of_format -> End_of_fmtty + +(* Extract the fmtty of an ignored parameter followed by the rest of + the format. *) +and fmtty_of_ignored_format : type x y a b c d e f . + (a, b, c, d, y, x) ignored -> + (x, b, c, y, e, f) fmt -> + (a, b, c, d, e, f) fmtty = +fun ign fmt -> match ign with + | Ignored_char -> fmtty_of_fmt fmt + | Ignored_caml_char -> fmtty_of_fmt fmt + | Ignored_string _ -> fmtty_of_fmt fmt + | Ignored_caml_string _ -> fmtty_of_fmt fmt + | Ignored_int (_, _) -> fmtty_of_fmt fmt + | Ignored_int32 (_, _) -> fmtty_of_fmt fmt + | Ignored_nativeint (_, _) -> fmtty_of_fmt fmt + | Ignored_int64 (_, _) -> fmtty_of_fmt fmt + | Ignored_float (_, _) -> fmtty_of_fmt fmt + | Ignored_bool -> fmtty_of_fmt fmt + | Ignored_format_arg _ -> fmtty_of_fmt fmt + | Ignored_format_subst (_, fmtty) -> concat_fmtty fmtty (fmtty_of_fmt fmt) + | Ignored_reader -> Ignored_reader_ty (fmtty_of_fmt fmt) + | Ignored_scan_char_set _ -> fmtty_of_fmt fmt + | Ignored_scan_get_counter _ -> fmtty_of_fmt fmt + +(* Add an Int_ty node if padding is taken as an extra argument (ex: "%*s"). *) +and fmtty_of_padding_fmtty : type x a b c d e f . + (x, a) padding -> (a, b, c, d, e, f) fmtty -> (x, b, c, d, e, f) fmtty = + fun pad fmtty -> match pad with + | No_padding -> fmtty + | Lit_padding _ -> fmtty + | Arg_padding _ -> Int_ty fmtty + +(* Add an Int_ty node if precision is taken as an extra argument (ex: "%.*f").*) +and fmtty_of_precision_fmtty : type x a b c d e f . + (x, a) precision -> (a, b, c, d, e, f) fmtty -> (x, b, c, d, e, f) fmtty = + fun prec fmtty -> match prec with + | No_precision -> fmtty + | Lit_precision _ -> fmtty + | Arg_precision -> Int_ty fmtty + +(******************************************************************************) + (* Format typing *) + +(* Exception raised when a format does not match a given format type. *) +exception Type_mismatch + +(* Type a padding. *) +(* Take an Int_ty from the fmtty if the integer should be kept as argument. *) +(* Raise Type_mismatch in case of type mismatch. *) +let type_padding : type a b c d e f x y . + (x, y) padding -> (a, b, c, d, e, f) fmtty -> + (a, b, c, d, e, f) padding_fmtty_ebb = +fun pad fmtty -> match pad, fmtty with + | No_padding, _ -> Padding_fmtty_EBB (No_padding, fmtty) + | Lit_padding (padty, w), _ -> Padding_fmtty_EBB (Lit_padding (padty,w),fmtty) + | Arg_padding padty, Int_ty rest -> Padding_fmtty_EBB (Arg_padding padty,rest) + | _ -> raise Type_mismatch + +(* Convert a (upadding, uprecision) to a (padding, precision). *) +(* Take one or two Int_ty from the fmtty if needed. *) +(* Raise Type_mismatch in case of type mismatch. *) +let type_padprec : type a b c d e f x y z . + (x, y) padding -> (y, z) precision -> (a, b, c, d, e, f) fmtty -> + (a, b, c, d, e, f) padprec_fmtty_ebb = +fun pad prec fmtty -> match prec, type_padding pad fmtty with + | No_precision, Padding_fmtty_EBB (pad, rest) -> + Padprec_fmtty_EBB (pad, No_precision, rest) + | Lit_precision p, Padding_fmtty_EBB (pad, rest) -> + Padprec_fmtty_EBB (pad, Lit_precision p, rest) + | Arg_precision, Padding_fmtty_EBB (pad, Int_ty rest) -> + Padprec_fmtty_EBB (pad, Arg_precision, rest) + | _, Padding_fmtty_EBB (_, _) -> raise Type_mismatch + +(* Type a format according to an fmtty. *) +(* If typing succeed, generate a copy of the format with the same + type parameters as the fmtty. *) +(* Raise a Failure with an error message in case of type mismatch. *) +let rec type_format : + type a1 b1 c1 d1 e1 f1 + a2 b2 c2 d2 e2 f2 . + (a1, b1, c1, d1, e1, f1) fmt + -> (a2, b2, c2, d2, e2, f2) fmtty + -> (a2, b2, c2, d2, e2, f2) fmt += fun fmt fmtty -> match type_format_gen fmt fmtty with + | Fmt_fmtty_EBB (fmt', End_of_fmtty) -> fmt' + | _ -> raise Type_mismatch + +and type_format_gen : + type a1 b1 c1 d1 e1 f1 + a2 b2 c2 d2 e2 f2 . + (a1, b1, c1, d1, e1, f1) fmt + -> (a2, b2, c2, d2, e2, f2) fmtty + -> (a2, b2, c2, d2, e2, f2) fmt_fmtty_ebb += fun fmt fmtty -> match fmt, fmtty with + | Char fmt_rest, Char_ty fmtty_rest -> + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in + Fmt_fmtty_EBB (Char fmt', fmtty') + | Caml_char fmt_rest, Char_ty fmtty_rest -> + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in + Fmt_fmtty_EBB (Caml_char fmt', fmtty') + | String (pad, fmt_rest), _ -> ( + match type_padding pad fmtty with + | Padding_fmtty_EBB (pad, String_ty fmtty_rest) -> + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in + Fmt_fmtty_EBB (String (pad, fmt'), fmtty') + | Padding_fmtty_EBB (_, _) -> raise Type_mismatch + ) + | Caml_string (pad, fmt_rest), _ -> ( + match type_padding pad fmtty with + | Padding_fmtty_EBB (pad, String_ty fmtty_rest) -> + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in + Fmt_fmtty_EBB (Caml_string (pad, fmt'), fmtty') + | Padding_fmtty_EBB (_, _) -> raise Type_mismatch + ) + | Int (iconv, pad, prec, fmt_rest), _ -> ( + match type_padprec pad prec fmtty with + | Padprec_fmtty_EBB (pad, prec, Int_ty fmtty_rest) -> + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in + Fmt_fmtty_EBB (Int (iconv, pad, prec, fmt'), fmtty') + | Padprec_fmtty_EBB (_, _, _) -> raise Type_mismatch + ) + | Int32 (iconv, pad, prec, fmt_rest), _ -> ( + match type_padprec pad prec fmtty with + | Padprec_fmtty_EBB (pad, prec, Int32_ty fmtty_rest) -> + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in + Fmt_fmtty_EBB (Int32 (iconv, pad, prec, fmt'), fmtty') + | Padprec_fmtty_EBB (_, _, _) -> raise Type_mismatch + ) + | Nativeint (iconv, pad, prec, fmt_rest), _ -> ( + match type_padprec pad prec fmtty with + | Padprec_fmtty_EBB (pad, prec, Nativeint_ty fmtty_rest) -> + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in + Fmt_fmtty_EBB (Nativeint (iconv, pad, prec, fmt'), fmtty') + | Padprec_fmtty_EBB (_, _, _) -> raise Type_mismatch + ) + | Int64 (iconv, pad, prec, fmt_rest), _ -> ( + match type_padprec pad prec fmtty with + | Padprec_fmtty_EBB (pad, prec, Int64_ty fmtty_rest) -> + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in + Fmt_fmtty_EBB (Int64 (iconv, pad, prec, fmt'), fmtty') + | Padprec_fmtty_EBB (_, _, _) -> raise Type_mismatch + ) + | Float (fconv, pad, prec, fmt_rest), _ -> ( + match type_padprec pad prec fmtty with + | Padprec_fmtty_EBB (pad, prec, Float_ty fmtty_rest) -> + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in + Fmt_fmtty_EBB (Float (fconv, pad, prec, fmt'), fmtty') + | Padprec_fmtty_EBB (_, _, _) -> raise Type_mismatch + ) + | Bool fmt_rest, Bool_ty fmtty_rest -> + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in + Fmt_fmtty_EBB (Bool fmt', fmtty') + | Flush fmt_rest, fmtty_rest -> + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in + Fmt_fmtty_EBB (Flush fmt', fmtty') + + | String_literal (str, fmt_rest), fmtty_rest -> + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in + Fmt_fmtty_EBB (String_literal (str, fmt'), fmtty') + | Char_literal (chr, fmt_rest), fmtty_rest -> + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in + Fmt_fmtty_EBB (Char_literal (chr, fmt'), fmtty') + + | Format_arg (pad_opt, sub_fmtty, fmt_rest), + Format_arg_ty (sub_fmtty', fmtty_rest) -> + if Fmtty_EBB sub_fmtty <> Fmtty_EBB sub_fmtty' then raise Type_mismatch; + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in + Fmt_fmtty_EBB (Format_arg (pad_opt, sub_fmtty', fmt'), fmtty') + | Format_subst (pad_opt, sub_fmtty, fmt_rest), + Format_subst_ty (sub_fmtty1, _sub_fmtty2, fmtty_rest) -> + if Fmtty_EBB (erase_rel sub_fmtty) <> Fmtty_EBB (erase_rel sub_fmtty1) then + raise Type_mismatch; + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest (erase_rel fmtty_rest) in + Fmt_fmtty_EBB (Format_subst (pad_opt, sub_fmtty1, fmt'), fmtty') + (* Printf and Format specific constructors: *) + | Alpha fmt_rest, Alpha_ty fmtty_rest -> + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in + Fmt_fmtty_EBB (Alpha fmt', fmtty') + | Theta fmt_rest, Theta_ty fmtty_rest -> + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in + Fmt_fmtty_EBB (Theta fmt', fmtty') + + (* Format specific constructors: *) + | Formatting_lit (formatting_lit, fmt_rest), fmtty_rest -> + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in + Fmt_fmtty_EBB (Formatting_lit (formatting_lit, fmt'), fmtty') + | Formatting_gen (formatting_gen, fmt_rest), fmtty_rest -> + type_formatting_gen formatting_gen fmt_rest fmtty_rest + + (* Scanf specific constructors: *) + | Reader fmt_rest, Reader_ty fmtty_rest -> + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in + Fmt_fmtty_EBB (Reader fmt', fmtty') + | Scan_char_set (width_opt, char_set, fmt_rest), String_ty fmtty_rest -> + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in + Fmt_fmtty_EBB (Scan_char_set (width_opt, char_set, fmt'), fmtty') + | Scan_get_counter (counter, fmt_rest), Int_ty fmtty_rest -> + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt_rest fmtty_rest in + Fmt_fmtty_EBB (Scan_get_counter (counter, fmt'), fmtty') + | Ignored_param (ign, rest), fmtty_rest -> + type_ignored_param ign rest fmtty_rest + + | End_of_format, fmtty_rest -> Fmt_fmtty_EBB (End_of_format, fmtty_rest) + + | _ -> raise Type_mismatch + +and type_formatting_gen : type a1 a3 b1 b3 c1 c3 d1 d3 e1 e2 e3 f1 f2 f3 . + (a1, b1, c1, d1, e1, f1) formatting_gen -> + (f1, b1, c1, e1, e2, f2) fmt -> + (a3, b3, c3, d3, e3, f3) fmtty -> + (a3, b3, c3, d3, e3, f3) fmt_fmtty_ebb = +fun formatting_gen fmt0 fmtty0 -> match formatting_gen with + | Open_tag (Format (fmt1, str)) -> + let Fmt_fmtty_EBB (fmt2, fmtty2) = type_format_gen fmt1 fmtty0 in + let Fmt_fmtty_EBB (fmt3, fmtty3) = type_format_gen fmt0 fmtty2 in + Fmt_fmtty_EBB (Formatting_gen (Open_tag (Format (fmt2, str)), fmt3), fmtty3) + | Open_box (Format (fmt1, str)) -> + let Fmt_fmtty_EBB (fmt2, fmtty2) = type_format_gen fmt1 fmtty0 in + let Fmt_fmtty_EBB (fmt3, fmtty3) = type_format_gen fmt0 fmtty2 in + Fmt_fmtty_EBB (Formatting_gen (Open_tag (Format (fmt2, str)), fmt3), fmtty3) + +(* Type an Ignored_param node according to an fmtty. *) +and type_ignored_param : type p q x y z t u v a b c d e f . + (x, y, z, t, q, p) ignored -> + (p, y, z, q, u, v) fmt -> + (a, b, c, d, e, f) fmtty -> + (a, b, c, d, e, f) fmt_fmtty_ebb = +fun ign fmt fmtty -> match ign with + | Ignored_char as ign' -> type_ignored_param_one ign' fmt fmtty + | Ignored_caml_char as ign' -> type_ignored_param_one ign' fmt fmtty + | Ignored_string _ as ign' -> type_ignored_param_one ign' fmt fmtty + | Ignored_caml_string _ as ign' -> type_ignored_param_one ign' fmt fmtty + | Ignored_int _ as ign' -> type_ignored_param_one ign' fmt fmtty + | Ignored_int32 _ as ign' -> type_ignored_param_one ign' fmt fmtty + | Ignored_nativeint _ as ign' -> type_ignored_param_one ign' fmt fmtty + | Ignored_int64 _ as ign' -> type_ignored_param_one ign' fmt fmtty + | Ignored_float _ as ign' -> type_ignored_param_one ign' fmt fmtty + | Ignored_bool as ign' -> type_ignored_param_one ign' fmt fmtty + | Ignored_scan_char_set _ as ign' -> type_ignored_param_one ign' fmt fmtty + | Ignored_scan_get_counter _ as ign' -> type_ignored_param_one ign' fmt fmtty + | Ignored_format_arg (pad_opt, sub_fmtty) -> + type_ignored_param_one (Ignored_format_arg (pad_opt, sub_fmtty)) fmt fmtty + | Ignored_format_subst (pad_opt, sub_fmtty) -> + let Fmtty_fmt_EBB (sub_fmtty', Fmt_fmtty_EBB (fmt', fmtty')) = + type_ignored_format_substitution sub_fmtty fmt fmtty in + Fmt_fmtty_EBB (Ignored_param (Ignored_format_subst (pad_opt, sub_fmtty'), fmt'), fmtty') + | Ignored_reader -> ( + match fmtty with + | Ignored_reader_ty fmtty_rest -> + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt fmtty_rest in + Fmt_fmtty_EBB (Ignored_param (Ignored_reader, fmt'), fmtty') + | _ -> raise Type_mismatch + ) + +and type_ignored_param_one : type a1 a2 b1 b2 c1 c2 d1 d2 e1 e2 f1 f2 . + (a2, b2, c2, d2, d2, a2) ignored -> + (a1, b1, c1, d1, e1, f1) fmt -> + (a2, b2, c2, d2, e2, f2) fmtty -> + (a2, b2, c2, d2, e2, f2) fmt_fmtty_ebb += fun ign fmt fmtty -> + let Fmt_fmtty_EBB (fmt', fmtty') = type_format_gen fmt fmtty in + Fmt_fmtty_EBB (Ignored_param (ign, fmt'), fmtty') + +(* Typing of the complex case: "%_(...%)". *) +and type_ignored_format_substitution : type w x y z p s t u a b c d e f . + (w, x, y, z, s, p) fmtty -> + (p, x, y, s, t, u) fmt -> + (a, b, c, d, e, f) fmtty -> (a, b, c, d, e, f) fmtty_fmt_ebb = +fun sub_fmtty fmt fmtty -> match sub_fmtty, fmtty with + | Char_ty sub_fmtty_rest, Char_ty fmtty_rest -> + let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') = + type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in + Fmtty_fmt_EBB (Char_ty sub_fmtty_rest', fmt') + | String_ty sub_fmtty_rest, String_ty fmtty_rest -> + let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') = + type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in + Fmtty_fmt_EBB (String_ty sub_fmtty_rest', fmt') + | Int_ty sub_fmtty_rest, Int_ty fmtty_rest -> + let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') = + type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in + Fmtty_fmt_EBB (Int_ty sub_fmtty_rest', fmt') + | Int32_ty sub_fmtty_rest, Int32_ty fmtty_rest -> + let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') = + type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in + Fmtty_fmt_EBB (Int32_ty sub_fmtty_rest', fmt') + | Nativeint_ty sub_fmtty_rest, Nativeint_ty fmtty_rest -> + let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') = + type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in + Fmtty_fmt_EBB (Nativeint_ty sub_fmtty_rest', fmt') + | Int64_ty sub_fmtty_rest, Int64_ty fmtty_rest -> + let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') = + type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in + Fmtty_fmt_EBB (Int64_ty sub_fmtty_rest', fmt') + | Float_ty sub_fmtty_rest, Float_ty fmtty_rest -> + let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') = + type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in + Fmtty_fmt_EBB (Float_ty sub_fmtty_rest', fmt') + | Bool_ty sub_fmtty_rest, Bool_ty fmtty_rest -> + let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') = + type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in + Fmtty_fmt_EBB (Bool_ty sub_fmtty_rest', fmt') + | Alpha_ty sub_fmtty_rest, Alpha_ty fmtty_rest -> + let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') = + type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in + Fmtty_fmt_EBB (Alpha_ty sub_fmtty_rest', fmt') + | Theta_ty sub_fmtty_rest, Theta_ty fmtty_rest -> + let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') = + type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in + Fmtty_fmt_EBB (Theta_ty sub_fmtty_rest', fmt') + | Reader_ty sub_fmtty_rest, Reader_ty fmtty_rest -> + let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') = + type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in + Fmtty_fmt_EBB (Reader_ty sub_fmtty_rest', fmt') + | Ignored_reader_ty sub_fmtty_rest, Ignored_reader_ty fmtty_rest -> + let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') = + type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in + Fmtty_fmt_EBB (Ignored_reader_ty sub_fmtty_rest', fmt') + + | Format_arg_ty (sub2_fmtty, sub_fmtty_rest), + Format_arg_ty (sub2_fmtty', fmtty_rest) -> + if Fmtty_EBB sub2_fmtty <> Fmtty_EBB sub2_fmtty' then raise Type_mismatch; + let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') = + type_ignored_format_substitution sub_fmtty_rest fmt fmtty_rest in + Fmtty_fmt_EBB (Format_arg_ty (sub2_fmtty', sub_fmtty_rest'), fmt') + | Format_subst_ty (sub1_fmtty, sub2_fmtty, sub_fmtty_rest), + Format_subst_ty (sub1_fmtty', sub2_fmtty', fmtty_rest) -> + (* TODO define Fmtty_rel_EBB to remove those erase_rel *) + if Fmtty_EBB (erase_rel sub1_fmtty) <> Fmtty_EBB (erase_rel sub1_fmtty') then raise Type_mismatch; + if Fmtty_EBB (erase_rel sub2_fmtty) <> Fmtty_EBB (erase_rel sub2_fmtty') then raise Type_mismatch; + let sub_fmtty' = trans (symm sub1_fmtty') sub2_fmtty' in + let _, f2, _, f4 = fmtty_rel_det sub_fmtty' in + let Refl = f2 Refl in + let Refl = f4 Refl in + let Fmtty_fmt_EBB (sub_fmtty_rest', fmt') = + type_ignored_format_substitution (erase_rel sub_fmtty_rest) fmt fmtty_rest in + Fmtty_fmt_EBB (Format_subst_ty (sub1_fmtty', sub2_fmtty', symm sub_fmtty_rest'), fmt') + | End_of_fmtty, fmtty -> + Fmtty_fmt_EBB (End_of_fmtty, type_format_gen fmt fmtty) + | _ -> raise Type_mismatch + +(* This implementation of `recast` is a bit disappointing. The + invariant provided by the type are very strong: the input format's + type is in relation to the output type's as witnessed by the + fmtty_rel argument. One would at first expect this function to be + total, and implementable by exhaustive pattern matching. Instead, + we reuse the highly partial and much less well-defined function + `type_format` that has lost all knowledge of the correspondence + between the argument's types. + + Besides the fact that this function reuses a lot of the + `type_format` logic (eg.: seeing Int_ty in the fmtty parameter does + not let you match on Int only, as you may in fact have Float + (Arg_padding, ...) ("%.*d") beginning with an Int_ty), it is also + a partial function, because the typing information in a format is + not quite enough to reconstruct it unambiguously. For example, the + format types of "%d%_r" and "%_r%d" have the same format6 + parameters, but they are not at all exchangeable, and putting one + in place of the other must result in a dynamic failure. + + Given that: + - we'd have to duplicate a lot of non-trivial typing logic from type_format + - this wouldn't even eliminate (all) the dynamic failures + we decided to just reuse type_format directly for now. +*) +let recast : + type a1 b1 c1 d1 e1 f1 + a2 b2 c2 d2 e2 f2 + . + (a1, b1, c1, d1, e1, f1) fmt + -> (a1, b1, c1, d1, e1, f1, + a2, b2, c2, d2, e2, f2) fmtty_rel + -> (a2, b2, c2, d2, e2, f2) fmt += fun fmt fmtty -> + type_format fmt (erase_rel (symm fmtty)) + +(******************************************************************************) + (* Printing tools *) + +(* Add padding spaces arround a string. *) +let fix_padding padty width str = + let len = String.length str in + if width <= len then str else + let res = Bytes.make width (if padty = Zeros then '0' else ' ') in + begin match padty with + | Left -> String.blit str 0 res 0 len + | Right -> String.blit str 0 res (width - len) len + | Zeros when len > 0 && (str.[0] = '+' || str.[0] = '-' || str.[0] = ' ') -> + Bytes.set res 0 str.[0]; + String.blit str 1 res (width - len + 1) (len - 1) + | Zeros when len > 1 && str.[0] = '0' && (str.[1] = 'x' || str.[1] = 'X') -> + Bytes.set res 1 str.[1]; + String.blit str 2 res (width - len + 2) (len - 2) + | Zeros -> + String.blit str 0 res (width - len) len + end; + Bytes.unsafe_to_string res + +(* Add '0' padding to int, int32, nativeint or int64 string representation. *) +let fix_int_precision prec str = + let len = String.length str in + if prec <= len then str else + let res = Bytes.make prec '0' in + begin match str.[0] with + | ('+' | '-' | ' ') as c -> + Bytes.set res 0 c; + String.blit str 1 res (prec - len + 1) (len - 1); + | '0' when len > 1 && (str.[1] = 'x' || str.[1] = 'X') -> + Bytes.set res 1 str.[1]; + String.blit str 2 res (prec - len + 2) (len - 2); + | '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> + String.blit str 0 res (prec - len) len; + | _ -> + assert false + end; + Bytes.unsafe_to_string res + +(* Escape a string according to the OCaml lexing convention. *) +let string_to_caml_string str = + let str = String.escaped str in + let l = String.length str in + let res = Bytes.make (l + 2) '\"' in + String.unsafe_blit str 0 res 1 l; + Bytes.unsafe_to_string res + +(* Generate the format_int/int32/nativeint/int64 first argument + from an int_conv. *) +let format_of_iconv = function + | Int_d -> "%d" | Int_pd -> "%+d" | Int_sd -> "% d" + | Int_i -> "%i" | Int_pi -> "%+i" | Int_si -> "% i" + | Int_x -> "%x" | Int_Cx -> "%#x" + | Int_X -> "%X" | Int_CX -> "%#X" + | Int_o -> "%o" | Int_Co -> "%#o" + | Int_u -> "%u" + +let format_of_iconvL = function + | Int_d -> "%Ld" | Int_pd -> "%+Ld" | Int_sd -> "% Ld" + | Int_i -> "%Li" | Int_pi -> "%+Li" | Int_si -> "% Li" + | Int_x -> "%Lx" | Int_Cx -> "%#Lx" + | Int_X -> "%LX" | Int_CX -> "%#LX" + | Int_o -> "%Lo" | Int_Co -> "%#Lo" + | Int_u -> "%Lu" + +let format_of_iconvl = function + | Int_d -> "%ld" | Int_pd -> "%+ld" | Int_sd -> "% ld" + | Int_i -> "%li" | Int_pi -> "%+li" | Int_si -> "% li" + | Int_x -> "%lx" | Int_Cx -> "%#lx" + | Int_X -> "%lX" | Int_CX -> "%#lX" + | Int_o -> "%lo" | Int_Co -> "%#lo" + | Int_u -> "%lu" + +let format_of_iconvn = function + | Int_d -> "%nd" | Int_pd -> "%+nd" | Int_sd -> "% nd" + | Int_i -> "%ni" | Int_pi -> "%+ni" | Int_si -> "% ni" + | Int_x -> "%nx" | Int_Cx -> "%#nx" + | Int_X -> "%nX" | Int_CX -> "%#nX" + | Int_o -> "%no" | Int_Co -> "%#no" + | Int_u -> "%nu" + +(* Generate the format_float first argument form a float_conv. *) +let format_of_fconv fconv prec = + let symb = if fconv = Float_F then 'g' else char_of_fconv fconv in + let buf = buffer_create 16 in + buffer_add_char buf '%'; + bprint_fconv_flag buf fconv; + buffer_add_char buf '.'; + buffer_add_string buf (string_of_int prec); + buffer_add_char buf symb; + buffer_contents buf + +(* Convert an integer to a string according to a conversion. *) +let convert_int iconv n = format_int (format_of_iconv iconv) n +let convert_int32 iconv n = format_int32 (format_of_iconvl iconv) n +let convert_nativeint iconv n = format_nativeint (format_of_iconvn iconv) n +let convert_int64 iconv n = format_int64 (format_of_iconvL iconv) n + +(* Convert a float to string. *) +(* Fix special case of "OCaml float format". *) +let convert_float fconv prec x = + let str = format_float (format_of_fconv fconv prec) x in + if fconv <> Float_F then str else + let len = String.length str in + let rec is_valid i = + if i = len then false else + match str.[i] with + | '.' | 'e' | 'E' -> true + | _ -> is_valid (i + 1) + in + match classify_float x with + | FP_normal | FP_subnormal | FP_zero -> + if is_valid 0 then str else str ^ "." + | FP_infinite -> + if x < 0.0 then "neg_infinity" else "infinity" + | FP_nan -> "nan" + +(* Convert a char to a string according to the OCaml lexical convention. *) +let format_caml_char c = + let str = Char.escaped c in + let l = String.length str in + let res = Bytes.make (l + 2) '\'' in + String.unsafe_blit str 0 res 1 l; + Bytes.unsafe_to_string res + +(* Convert a format type to string *) +let string_of_fmtty fmtty = + let buf = buffer_create 16 in + bprint_fmtty buf fmtty; + buffer_contents buf + +(******************************************************************************) + (* Generic printing function *) + +(* Make a generic printing function. *) +(* Used to generate Printf and Format printing functions. *) +(* Parameters: + k: a continuation finally applied to the output stream and the accumulator. + o: the output stream (see k, %a and %t). + acc: rev list of printing entities (string, char, flush, formatting, ...). + fmt: the format. *) +let rec make_printf : type a b c d e f . + (b -> (b, c) acc -> f) -> b -> (b, c) acc -> + (a, b, c, d, e, f) fmt -> a = +fun k o acc fmt -> match fmt with + | Char rest -> + fun c -> + let new_acc = Acc_data_char (acc, c) in + make_printf k o new_acc rest + | Caml_char rest -> + fun c -> + let new_acc = Acc_data_string (acc, format_caml_char c) in + make_printf k o new_acc rest + | String (pad, rest) -> + make_string_padding k o acc rest pad (fun str -> str) + | Caml_string (pad, rest) -> + make_string_padding k o acc rest pad string_to_caml_string + | Int (iconv, pad, prec, rest) -> + make_int_padding_precision k o acc rest pad prec convert_int iconv + | Int32 (iconv, pad, prec, rest) -> + make_int_padding_precision k o acc rest pad prec convert_int32 iconv + | Nativeint (iconv, pad, prec, rest) -> + make_int_padding_precision k o acc rest pad prec convert_nativeint iconv + | Int64 (iconv, pad, prec, rest) -> + make_int_padding_precision k o acc rest pad prec convert_int64 iconv + | Float (fconv, pad, prec, rest) -> + make_float_padding_precision k o acc rest pad prec fconv + | Bool rest -> + fun b -> make_printf k o (Acc_data_string (acc, string_of_bool b)) rest + | Alpha rest -> + fun f x -> make_printf k o (Acc_delay (acc, fun o -> f o x)) rest + | Theta rest -> + fun f -> make_printf k o (Acc_delay (acc, f)) rest + | Reader _ -> + (* This case is impossible, by typing of formats. *) + (* Indeed, since printf and co. take a format4 as argument, the 'd and 'e + type parameters of fmt are obviously equals. The Reader is the + only constructor which touch 'd and 'e type parameters of the format + type, it adds an (->) to the 'd parameters. Consequently, a format4 + cannot contain a Reader node, except in the sub-format associated to + an %{...%}. It's not a problem because make_printf do not call + itself recursively on the sub-format associated to %{...%}. *) + assert false + | Flush rest -> + make_printf k o (Acc_flush acc) rest + + | String_literal (str, rest) -> + make_printf k o (Acc_string_literal (acc, str)) rest + | Char_literal (chr, rest) -> + make_printf k o (Acc_char_literal (acc, chr)) rest + + | Format_arg (_, sub_fmtty, rest) -> + let ty = string_of_fmtty sub_fmtty in + (fun str -> + ignore str; + make_printf k o (Acc_data_string (acc, ty)) rest) + | Format_subst (_, fmtty, rest) -> + fun (Format (fmt, _)) -> make_printf k o acc + (concat_fmt (recast fmt fmtty) rest) + + | Scan_char_set (_, _, rest) -> + let new_acc = Acc_invalid_arg (acc, "Printf: bad conversion %[") in + fun _ -> make_printf k o new_acc rest + | Scan_get_counter (_, rest) -> + (* This case should be refused for Printf. *) + (* Accepted for backward compatibility. *) + (* Interpret %l, %n and %L as %u. *) + fun n -> + let new_acc = Acc_data_string (acc, format_int "%u" n) in + make_printf k o new_acc rest + | Ignored_param (ign, rest) -> + make_ignored_param k o acc ign rest + + | Formatting_lit (fmting_lit, rest) -> + make_printf k o (Acc_formatting_lit (acc, fmting_lit)) rest + | Formatting_gen (Open_tag (Format (fmt', _)), rest) -> + let k' koc kacc = + make_printf k koc (Acc_formatting_gen (acc, Acc_open_tag kacc)) rest in + make_printf k' o End_of_acc fmt' + | Formatting_gen (Open_box (Format (fmt', _)), rest) -> + let k' koc kacc = + make_printf k koc (Acc_formatting_gen (acc, Acc_open_box kacc)) rest in + make_printf k' o End_of_acc fmt' + + | End_of_format -> + k o acc + +(* Delay the error (Invalid_argument "Printf: bad conversion %_"). *) +(* Generate functions to take remaining arguments (after the "%_"). *) +and make_ignored_param : type x y a b c d e f . + (b -> (b, c) acc -> f) -> b -> (b, c) acc -> + (a, b, c, d, y, x) ignored -> + (x, b, c, y, e, f) fmt -> a = +fun k o acc ign fmt -> match ign with + | Ignored_char -> make_invalid_arg k o acc fmt + | Ignored_caml_char -> make_invalid_arg k o acc fmt + | Ignored_string _ -> make_invalid_arg k o acc fmt + | Ignored_caml_string _ -> make_invalid_arg k o acc fmt + | Ignored_int (_, _) -> make_invalid_arg k o acc fmt + | Ignored_int32 (_, _) -> make_invalid_arg k o acc fmt + | Ignored_nativeint (_, _) -> make_invalid_arg k o acc fmt + | Ignored_int64 (_, _) -> make_invalid_arg k o acc fmt + | Ignored_float (_, _) -> make_invalid_arg k o acc fmt + | Ignored_bool -> make_invalid_arg k o acc fmt + | Ignored_format_arg _ -> make_invalid_arg k o acc fmt + | Ignored_format_subst (_, fmtty) -> make_from_fmtty k o acc fmtty fmt + | Ignored_reader -> assert false + | Ignored_scan_char_set _ -> make_invalid_arg k o acc fmt + | Ignored_scan_get_counter _ -> make_invalid_arg k o acc fmt + + +(* Special case of printf "%_(". *) +and make_from_fmtty : type x y a b c d e f . + (b -> (b, c) acc -> f) -> b -> (b, c) acc -> + (a, b, c, d, y, x) fmtty -> + (x, b, c, y, e, f) fmt -> a = +fun k o acc fmtty fmt -> match fmtty with + | Char_ty rest -> fun _ -> make_from_fmtty k o acc rest fmt + | String_ty rest -> fun _ -> make_from_fmtty k o acc rest fmt + | Int_ty rest -> fun _ -> make_from_fmtty k o acc rest fmt + | Int32_ty rest -> fun _ -> make_from_fmtty k o acc rest fmt + | Nativeint_ty rest -> fun _ -> make_from_fmtty k o acc rest fmt + | Int64_ty rest -> fun _ -> make_from_fmtty k o acc rest fmt + | Float_ty rest -> fun _ -> make_from_fmtty k o acc rest fmt + | Bool_ty rest -> fun _ -> make_from_fmtty k o acc rest fmt + | Alpha_ty rest -> fun _ _ -> make_from_fmtty k o acc rest fmt + | Theta_ty rest -> fun _ -> make_from_fmtty k o acc rest fmt + | Reader_ty _ -> assert false + | Ignored_reader_ty _ -> assert false + | Format_arg_ty (_, rest) -> fun _ -> make_from_fmtty k o acc rest fmt + | End_of_fmtty -> make_invalid_arg k o acc fmt + | Format_subst_ty (ty1, ty2, rest) -> + let ty = trans (symm ty1) ty2 in + fun _ -> make_from_fmtty k o acc (concat_fmtty ty rest) fmt + +(* Insert an Acc_invalid_arg in the accumulator and continue to generate + closures to get the remaining arguments. *) +and make_invalid_arg : type a b c d e f . + (b -> (b, c) acc -> f) -> b -> (b, c) acc -> + (a, b, c, d, e, f) fmt -> a = +fun k o acc fmt -> + make_printf k o (Acc_invalid_arg (acc, "Printf: bad conversion %_")) fmt + +(* Fix padding, take it as an extra integer argument if needed. *) +and make_string_padding : type x z a b c d e f . + (b -> (b, c) acc -> f) -> b -> (b, c) acc -> + (a, b, c, d, e, f) fmt -> + (x, z -> a) padding -> (z -> string) -> x = + fun k o acc fmt pad trans -> match pad with + | No_padding -> + fun x -> + let new_acc = Acc_data_string (acc, trans x) in + make_printf k o new_acc fmt + | Lit_padding (padty, width) -> + fun x -> + let new_acc = Acc_data_string (acc, fix_padding padty width (trans x)) in + make_printf k o new_acc fmt + | Arg_padding padty -> + fun w x -> + let new_acc = Acc_data_string (acc, fix_padding padty w (trans x)) in + make_printf k o new_acc fmt + +(* Fix padding and precision for int, int32, nativeint or int64. *) +(* Take one or two extra integer arguments if needed. *) +and make_int_padding_precision : type x y z a b c d e f . + (b -> (b, c) acc -> f) -> b -> (b, c) acc -> + (a, b, c, d, e, f) fmt -> + (x, y) padding -> (y, z -> a) precision -> (int_conv -> z -> string) -> + int_conv -> x = + fun k o acc fmt pad prec trans iconv -> match pad, prec with + | No_padding, No_precision -> + fun x -> + let str = trans iconv x in + make_printf k o (Acc_data_string (acc, str)) fmt + | No_padding, Lit_precision p -> + fun x -> + let str = fix_int_precision p (trans iconv x) in + make_printf k o (Acc_data_string (acc, str)) fmt + | No_padding, Arg_precision -> + fun p x -> + let str = fix_int_precision p (trans iconv x) in + make_printf k o (Acc_data_string (acc, str)) fmt + | Lit_padding (padty, w), No_precision -> + fun x -> + let str = fix_padding padty w (trans iconv x) in + make_printf k o (Acc_data_string (acc, str)) fmt + | Lit_padding (padty, w), Lit_precision p -> + fun x -> + let str = fix_padding padty w (fix_int_precision p (trans iconv x)) in + make_printf k o (Acc_data_string (acc, str)) fmt + | Lit_padding (padty, w), Arg_precision -> + fun p x -> + let str = fix_padding padty w (fix_int_precision p (trans iconv x)) in + make_printf k o (Acc_data_string (acc, str)) fmt + | Arg_padding padty, No_precision -> + fun w x -> + let str = fix_padding padty w (trans iconv x) in + make_printf k o (Acc_data_string (acc, str)) fmt + | Arg_padding padty, Lit_precision p -> + fun w x -> + let str = fix_padding padty w (fix_int_precision p (trans iconv x)) in + make_printf k o (Acc_data_string (acc, str)) fmt + | Arg_padding padty, Arg_precision -> + fun w p x -> + let str = fix_padding padty w (fix_int_precision p (trans iconv x)) in + make_printf k o (Acc_data_string (acc, str)) fmt + +(* Convert a float, fix padding and precision if needed. *) +(* Take the float argument and one or two extra integer arguments if needed. *) +and make_float_padding_precision : type x y a b c d e f . + (b -> (b, c) acc -> f) -> b -> (b, c) acc -> + (a, b, c, d, e, f) fmt -> + (x, y) padding -> (y, float -> a) precision -> float_conv -> x = + fun k o acc fmt pad prec fconv -> match pad, prec with + | No_padding, No_precision -> + fun x -> + let str = convert_float fconv default_float_precision x in + make_printf k o (Acc_data_string (acc, str)) fmt + | No_padding, Lit_precision p -> + fun x -> + let str = convert_float fconv p x in + make_printf k o (Acc_data_string (acc, str)) fmt + | No_padding, Arg_precision -> + fun p x -> + let str = convert_float fconv p x in + make_printf k o (Acc_data_string (acc, str)) fmt + | Lit_padding (padty, w), No_precision -> + fun x -> + let str = convert_float fconv default_float_precision x in + let str' = fix_padding padty w str in + make_printf k o (Acc_data_string (acc, str')) fmt + | Lit_padding (padty, w), Lit_precision p -> + fun x -> + let str = fix_padding padty w (convert_float fconv p x) in + make_printf k o (Acc_data_string (acc, str)) fmt + | Lit_padding (padty, w), Arg_precision -> + fun p x -> + let str = fix_padding padty w (convert_float fconv p x) in + make_printf k o (Acc_data_string (acc, str)) fmt + | Arg_padding padty, No_precision -> + fun w x -> + let str = convert_float fconv default_float_precision x in + let str' = fix_padding padty w str in + make_printf k o (Acc_data_string (acc, str')) fmt + | Arg_padding padty, Lit_precision p -> + fun w x -> + let str = fix_padding padty w (convert_float fconv p x) in + make_printf k o (Acc_data_string (acc, str)) fmt + | Arg_padding padty, Arg_precision -> + fun w p x -> + let str = fix_padding padty w (convert_float fconv p x) in + make_printf k o (Acc_data_string (acc, str)) fmt + +(******************************************************************************) + (* Continuations for make_printf *) + +(* Recursively output an "accumulator" containing a reversed list of + printing entities (string, char, flus, ...) in an output_stream. *) +(* Used as a continuation of make_printf. *) +let rec output_acc o acc = match acc with + | Acc_formatting_lit (p, fmting_lit) -> + let s = string_of_formatting_lit fmting_lit in + output_acc o p; output_string o s; + | Acc_formatting_gen (p, Acc_open_tag acc') -> + output_acc o p; output_string o "@{"; output_acc o acc'; + | Acc_formatting_gen (p, Acc_open_box acc') -> + output_acc o p; output_string o "@["; output_acc o acc'; + | Acc_string_literal (p, s) + | Acc_data_string (p, s) -> output_acc o p; output_string o s + | Acc_char_literal (p, c) + | Acc_data_char (p, c) -> output_acc o p; output_char o c + | Acc_delay (p, f) -> output_acc o p; f o + | Acc_flush p -> output_acc o p; flush o + | Acc_invalid_arg (p, msg) -> output_acc o p; invalid_arg msg; + | End_of_acc -> () + +(* Recursively output an "accumulator" containing a reversed list of + printing entities (string, char, flus, ...) in a buffer. *) +(* Used as a continuation of make_printf. *) +let rec bufput_acc b acc = match acc with + | Acc_formatting_lit (p, fmting_lit) -> + let s = string_of_formatting_lit fmting_lit in + bufput_acc b p; Buffer.add_string b s; + | Acc_formatting_gen (p, Acc_open_tag acc') -> + bufput_acc b p; Buffer.add_string b "@{"; bufput_acc b acc'; + | Acc_formatting_gen (p, Acc_open_box acc') -> + bufput_acc b p; Buffer.add_string b "@["; bufput_acc b acc'; + | Acc_string_literal (p, s) + | Acc_data_string (p, s) -> bufput_acc b p; Buffer.add_string b s + | Acc_char_literal (p, c) + | Acc_data_char (p, c) -> bufput_acc b p; Buffer.add_char b c + | Acc_delay (p, f) -> bufput_acc b p; f b + | Acc_flush p -> bufput_acc b p; + | Acc_invalid_arg (p, msg) -> bufput_acc b p; invalid_arg msg; + | End_of_acc -> () + +(* Recursively output an "accumulator" containing a reversed list of + printing entities (string, char, flus, ...) in a buffer. *) +(* Differ from bufput_acc by the interpretation of %a and %t. *) +(* Used as a continuation of make_printf. *) +let rec strput_acc b acc = match acc with + | Acc_formatting_lit (p, fmting_lit) -> + let s = string_of_formatting_lit fmting_lit in + strput_acc b p; Buffer.add_string b s; + | Acc_formatting_gen (p, Acc_open_tag acc') -> + strput_acc b p; Buffer.add_string b "@{"; strput_acc b acc'; + | Acc_formatting_gen (p, Acc_open_box acc') -> + strput_acc b p; Buffer.add_string b "@["; strput_acc b acc'; + | Acc_string_literal (p, s) + | Acc_data_string (p, s) -> strput_acc b p; Buffer.add_string b s + | Acc_char_literal (p, c) + | Acc_data_char (p, c) -> strput_acc b p; Buffer.add_char b c + | Acc_delay (p, f) -> strput_acc b p; Buffer.add_string b (f ()) + | Acc_flush p -> strput_acc b p; + | Acc_invalid_arg (p, msg) -> strput_acc b p; invalid_arg msg; + | End_of_acc -> () + +(******************************************************************************) + (* Error managment *) + +(* Raise a Failure with a pretty-printed error message. *) +let failwith_message (Format (fmt, _)) = + let buf = Buffer.create 256 in + let k () acc = strput_acc buf acc; failwith (Buffer.contents buf) in + make_printf k () End_of_acc fmt + +(******************************************************************************) + (* Formatting tools *) + +(* Convert a string to an open block description (indent, block_type) *) +let open_box_of_string str = + if str = "" then (0, Pp_box) else + let len = String.length str in + let invalid_box () = failwith_message "invalid box description %S" str in + let rec parse_spaces i = + if i = len then i else + match str.[i] with + | ' ' | '\t' -> parse_spaces (i + 1) + | _ -> i + and parse_lword i j = + if j = len then j else + match str.[j] with + | 'a' .. 'z' -> parse_lword i (j + 1) + | _ -> j + and parse_int i j = + if j = len then j else + match str.[j] with + | '0' .. '9' | '-' -> parse_int i (j + 1) + | _ -> j in + let wstart = parse_spaces 0 in + let wend = parse_lword wstart wstart in + let box_name = String.sub str wstart (wend - wstart) in + let nstart = parse_spaces wend in + let nend = parse_int nstart nstart in + let indent = + if nstart = nend then 0 else + try int_of_string (String.sub str nstart (nend - nstart)) + with Failure _ -> invalid_box () in + let exp_end = parse_spaces nend in + let () = if exp_end <> len then invalid_box () in + let box_type = match box_name with + | "" | "b" -> Pp_box + | "h" -> Pp_hbox + | "v" -> Pp_vbox + | "hv" -> Pp_hvbox + | "hov" -> Pp_hovbox + | _ -> invalid_box () in + (indent, box_type) + +(******************************************************************************) + (* Parsing tools *) + +(* Create a padding_fmt_ebb from a padding and a format. *) +(* Copy the padding to disjoin the type parameters of argument and result. *) +let make_padding_fmt_ebb : type x y . + (x, y) padding -> (_, _, _, _, _, _) fmt -> + (_, _, _, _, _) padding_fmt_ebb = +fun pad fmt -> match pad with + | No_padding -> Padding_fmt_EBB (No_padding, fmt) + | Lit_padding (s, w) -> Padding_fmt_EBB (Lit_padding (s, w), fmt) + | Arg_padding s -> Padding_fmt_EBB (Arg_padding s, fmt) + +(* Create a precision_fmt_ebb from a precision and a format. *) +(* Copy the precision to disjoin the type parameters of argument and result. *) +let make_precision_fmt_ebb : type x y . + (x, y) precision -> (_, _, _, _, _, _) fmt -> + (_, _, _, _, _) precision_fmt_ebb = +fun prec fmt -> match prec with + | No_precision -> Precision_fmt_EBB (No_precision, fmt) + | Lit_precision p -> Precision_fmt_EBB (Lit_precision p, fmt) + | Arg_precision -> Precision_fmt_EBB (Arg_precision, fmt) + +(* Create a padprec_fmt_ebb forma a padding, a precision and a format. *) +(* Copy the padding and the precision to disjoin type parameters of arguments + and result. *) +let make_padprec_fmt_ebb : type x y z t . + (x, y) padding -> (z, t) precision -> + (_, _, _, _, _, _) fmt -> + (_, _, _, _, _) padprec_fmt_ebb = +fun pad prec fmt -> + let Precision_fmt_EBB (prec, fmt') = make_precision_fmt_ebb prec fmt in + match pad with + | No_padding -> Padprec_fmt_EBB (No_padding, prec, fmt') + | Lit_padding (s, w) -> Padprec_fmt_EBB (Lit_padding (s, w), prec, fmt') + | Arg_padding s -> Padprec_fmt_EBB (Arg_padding s, prec, fmt') + +(******************************************************************************) + (* Format parsing *) + +(* Parse a string representing a format and create a fmt_ebb. *) +(* Raise an Failure exception in case of invalid format. *) +let fmt_ebb_of_string ?legacy_behavior str = + (* Parameters naming convention: *) + (* - lit_start: start of the literal sequence. *) + (* - str_ind: current index in the string. *) + (* - end_ind: end of the current (sub-)format. *) + (* - pct_ind: index of the '%' in the current micro-format. *) + (* - zero: is the '0' flag defined in the current micro-format. *) + (* - minus: is the '-' flag defined in the current micro-format. *) + (* - plus: is the '+' flag defined in the current micro-format. *) + (* - sharp: is the '#' flag defined in the current micro-format. *) + (* - space: is the ' ' flag defined in the current micro-format. *) + (* - ign: is the '_' flag defined in the current micro-format. *) + (* - pad: padding of the current micro-format. *) + (* - prec: precision of the current micro-format. *) + (* - symb: char representing the conversion ('c', 's', 'd', ...). *) + (* - char_set: set of characters as bitmap (see scanf %[...]). *) + + let legacy_behavior = match legacy_behavior with + | Some flag -> flag + | None -> true + (** When this flag is enabled, the format parser tries to behave as + the <4.02 implementations, in particular it ignores most benine + nonsensical format. When the flag is disabled, it will reject any + format that is not accepted by the specification. + + A typical example would be "%+ d": specifying both '+' (if the + number is positive, pad with a '+' to get the same width as + negative numbres) and ' ' (if the number is positive, pad with + a space) does not make sense, but the legacy (< 4.02) + implementation was happy to just ignore the space. + *) + in + + (* Raise a Failure with a friendly error message. *) + (* Used when the end of the format (or the current sub-format) was encoutered + unexpectedly. *) + let unexpected_end_of_format end_ind = + failwith_message + "invalid format %S: at character number %d, unexpected end of format" + str end_ind; + + (* Raise Failure with a friendly error message about an option dependencie + problem. *) + and invalid_format_without str_ind c s = + failwith_message + "invalid format %S: at character number %d, '%c' without %s" + str str_ind c s + + (* Raise Failure with a friendly error message about an unexpected + character. *) + and expected_character str_ind expected read = + failwith_message + "invalid format %S: at character number %d, %s expected, read %C" + str str_ind expected read in + + (* Parse the string from beg_ind (included) to end_ind (excluded). *) + let rec parse : type e f . int -> int -> (_, _, e, f) fmt_ebb = + fun beg_ind end_ind -> parse_literal beg_ind beg_ind end_ind + + (* Read literal characters up to '%' or '@' special characters. *) + and parse_literal : type e f . int -> int -> int -> (_, _, e, f) fmt_ebb = + fun lit_start str_ind end_ind -> + if str_ind = end_ind then add_literal lit_start str_ind End_of_format else + match str.[str_ind] with + | '%' -> + let Fmt_EBB fmt_rest = parse_format str_ind end_ind in + add_literal lit_start str_ind fmt_rest + | '@' -> + let Fmt_EBB fmt_rest = parse_after_at (str_ind + 1) end_ind in + add_literal lit_start str_ind fmt_rest + | _ -> + parse_literal lit_start (str_ind + 1) end_ind + + (* Parse a format after '%' *) + and parse_format : type e f . int -> int -> (_, _, e, f) fmt_ebb = + fun pct_ind end_ind -> parse_ign pct_ind (pct_ind + 1) end_ind + + and parse_ign : type e f . int -> int -> int -> (_, _, e, f) fmt_ebb = + fun pct_ind str_ind end_ind -> + if str_ind = end_ind then unexpected_end_of_format end_ind; + match str.[str_ind] with + | '_' -> parse_flags pct_ind (str_ind+1) end_ind true + | _ -> parse_flags pct_ind str_ind end_ind false + + and parse_flags : type e f . int -> int -> int -> bool -> (_, _, e, f) fmt_ebb = + fun pct_ind str_ind end_ind ign -> + let zero = ref false and minus = ref false + and plus = ref false and space = ref false + and sharp = ref false in + let set_flag str_ind flag = + (* in legacy mode, duplicate flags are accepted *) + if !flag && not legacy_behavior then + failwith_message + "invalid format %S: at character number %d, duplicate flag %C" + str str_ind str.[str_ind]; + flag := true; + in + let rec read_flags str_ind = + if str_ind = end_ind then unexpected_end_of_format end_ind; + begin match str.[str_ind] with + | '0' -> set_flag str_ind zero; read_flags (str_ind + 1) + | '-' -> set_flag str_ind minus; read_flags (str_ind + 1) + | '+' -> set_flag str_ind plus; read_flags (str_ind + 1) + | '#' -> set_flag str_ind sharp; read_flags (str_ind + 1) + | ' ' -> set_flag str_ind space; read_flags (str_ind + 1) + | _ -> + parse_padding pct_ind str_ind end_ind + !zero !minus !plus !sharp !space ign + end + in + read_flags str_ind + + (* Try to read a digital or a '*' padding. *) + and parse_padding : type e f . + int -> int -> int -> bool -> bool -> bool -> bool -> bool -> bool -> + (_, _, e, f) fmt_ebb = + fun pct_ind str_ind end_ind zero minus plus sharp space ign -> + if str_ind = end_ind then unexpected_end_of_format end_ind; + let padty = match zero, minus with + | false, false -> Right + | false, true -> Left + | true, false -> Zeros + | true, true -> + if legacy_behavior then Left + else incompatible_flag pct_ind str_ind '-' "0" in + match str.[str_ind] with + | '0' .. '9' -> + let new_ind, width = parse_positive str_ind end_ind 0 in + parse_after_padding pct_ind new_ind end_ind plus sharp space ign + (Lit_padding (padty, width)) + | '*' -> + parse_after_padding pct_ind (str_ind + 1) end_ind plus sharp space ign + (Arg_padding padty) + | _ -> + if legacy_behavior then + parse_after_padding pct_ind str_ind end_ind plus sharp space ign + No_padding + else begin match padty with + | Left -> + invalid_format_without (str_ind - 1) '-' "padding" + | Zeros -> + invalid_format_without (str_ind - 1) '0' "padding" + | Right -> + parse_after_padding pct_ind str_ind end_ind plus sharp space ign + No_padding + end + + (* Is precision defined? *) + and parse_after_padding : type x e f . + int -> int -> int -> bool -> bool -> bool -> bool -> (x, _) padding -> + (_, _, e, f) fmt_ebb = + fun pct_ind str_ind end_ind plus sharp space ign pad -> + if str_ind = end_ind then unexpected_end_of_format end_ind; + match str.[str_ind] with + | '.' -> + parse_precision pct_ind (str_ind + 1) end_ind plus sharp space ign pad + | symb -> + parse_conversion pct_ind (str_ind + 1) end_ind plus sharp space ign pad + No_precision symb + + (* Read the digital or '*' precision. *) + and parse_precision : type x e f . + int -> int -> int -> bool -> bool -> bool -> bool -> (x, _) padding -> + (_, _, e, f) fmt_ebb = + fun pct_ind str_ind end_ind plus sharp space ign pad -> + if str_ind = end_ind then unexpected_end_of_format end_ind; + let parse_literal str_ind = + let new_ind, prec = parse_positive str_ind end_ind 0 in + if new_ind = end_ind then unexpected_end_of_format end_ind; + parse_conversion pct_ind (new_ind + 1) end_ind plus sharp space ign pad + (Lit_precision prec) str.[new_ind] in + match str.[str_ind] with + | '0' .. '9' -> parse_literal str_ind + | ('+' | '-') when legacy_behavior -> + (* Legacy mode would accept and ignore '+' or '-' before the + integer describing the desired precision; not that this + cannot happen for padding width, as '+' and '-' already have + a semantics there. + + That said, the idea (supported by this tweak) that width and + precision literals are "integer literals" in the OCaml sense is + still blatantly wrong, as 123_456 or 0xFF are rejected. *) + parse_literal (str_ind + 1) + | '*' -> + parse_after_precision pct_ind (str_ind + 1) end_ind plus sharp space ign + pad Arg_precision + | _ -> + if legacy_behavior then + (* note that legacy implementation did not ignore '.' without + a number (as it does for padding indications), but + interprets it as '.0' *) + parse_after_precision pct_ind str_ind end_ind plus sharp space ign pad (Lit_precision 0) else + invalid_format_without (str_ind - 1) '.' "precision" + + (* Try to read the conversion. *) + and parse_after_precision : type x z e f . + int -> int -> int -> bool -> bool -> bool -> bool -> (x, _) padding -> + (z, _) precision -> (_, _, e, f) fmt_ebb = + fun pct_ind str_ind end_ind plus sharp space ign pad prec -> + if str_ind = end_ind then unexpected_end_of_format end_ind; + parse_conversion pct_ind (str_ind + 1) end_ind plus sharp space ign pad prec + str.[str_ind] + + (* Case analysis on conversion. *) + and parse_conversion : type x y z t e f . + int -> int -> int -> bool -> bool -> bool -> bool -> (x, y) padding -> + (z, t) precision -> char -> (_, _, e, f) fmt_ebb = + fun pct_ind str_ind end_ind plus sharp space ign pad prec symb -> + (* Flags used to check option usages/compatibilities. *) + let plus_used = ref false and sharp_used = ref false + and space_used = ref false and ign_used = ref false + and pad_used = ref false and prec_used = ref false in + + (* Access to options, update flags. *) + let get_plus () = plus_used := true; plus + and get_sharp () = sharp_used := true; sharp + and get_space () = space_used := true; space + and get_ign () = ign_used := true; ign + and get_pad () = pad_used := true; pad + and get_prec () = prec_used := true; prec in + + (* Check that padty <> Zeros. *) + let check_no_0 symb (type a) (type b) (pad : (a,b) padding) = + match pad with + | No_padding -> pad + | Lit_padding ((Left | Right), _) -> pad + | Arg_padding (Left | Right) -> pad + | Lit_padding (Zeros, width) -> + if legacy_behavior then Lit_padding (Right, width) + else incompatible_flag pct_ind str_ind symb "0" + | Arg_padding Zeros -> + if legacy_behavior then Arg_padding Right + else incompatible_flag pct_ind str_ind symb "0" + in + + (* Get padding as a pad_option (see "%_", "%{", "%(" and "%["). + (no need for legacy mode tweaking, those were rejected by the + legacy parser as well) *) + let get_pad_opt c = match get_pad () with + | No_padding -> None + | Lit_padding (Right, width) -> Some width + | Lit_padding (Zeros, width) -> + if legacy_behavior then Some width + else incompatible_flag pct_ind str_ind c "'0'" + | Lit_padding (Left, width) -> + if legacy_behavior then Some width + else incompatible_flag pct_ind str_ind c "'-'" + | Arg_padding _ -> incompatible_flag pct_ind str_ind c "'*'" + in + + (* Get precision as a prec_option (see "%_f"). + (no need for legacy mode tweaking, those were rejected by the + legacy parser as well) *) + let get_prec_opt () = match get_prec () with + | No_precision -> None + | Lit_precision ndec -> Some ndec + | Arg_precision -> incompatible_flag pct_ind str_ind '_' "'*'" + in + + let fmt_result = match symb with + | ',' -> + parse str_ind end_ind + | 'c' -> + let Fmt_EBB fmt_rest = parse str_ind end_ind in + if get_ign () then Fmt_EBB (Ignored_param (Ignored_char, fmt_rest)) + else Fmt_EBB (Char fmt_rest) + | 'C' -> + let Fmt_EBB fmt_rest = parse str_ind end_ind in + if get_ign () then Fmt_EBB (Ignored_param (Ignored_caml_char,fmt_rest)) + else Fmt_EBB (Caml_char fmt_rest) + | 's' -> + let pad = check_no_0 symb (get_pad ()) in + let Fmt_EBB fmt_rest = parse str_ind end_ind in + if get_ign () then + let ignored = Ignored_string (get_pad_opt '_') in + Fmt_EBB (Ignored_param (ignored, fmt_rest)) + else + let Padding_fmt_EBB (pad', fmt_rest') = + make_padding_fmt_ebb pad fmt_rest in + Fmt_EBB (String (pad', fmt_rest')) + | 'S' -> + let pad = check_no_0 symb (get_pad ()) in + let Fmt_EBB fmt_rest = parse str_ind end_ind in + if get_ign () then + let ignored = Ignored_caml_string (get_pad_opt '_') in + Fmt_EBB (Ignored_param (ignored, fmt_rest)) + else + let Padding_fmt_EBB (pad', fmt_rest') = + make_padding_fmt_ebb pad fmt_rest in + Fmt_EBB (Caml_string (pad', fmt_rest')) + | 'd' | 'i' | 'x' | 'X' | 'o' | 'u' -> + let iconv = compute_int_conv pct_ind str_ind (get_plus ()) (get_sharp ()) + (get_space ()) symb in + let Fmt_EBB fmt_rest = parse str_ind end_ind in + if get_ign () then + let ignored = Ignored_int (iconv, get_pad_opt '_') in + Fmt_EBB (Ignored_param (ignored, fmt_rest)) + else + let Padprec_fmt_EBB (pad', prec', fmt_rest') = + make_padprec_fmt_ebb (get_pad ()) (get_prec ()) fmt_rest in + Fmt_EBB (Int (iconv, pad', prec', fmt_rest')) + | 'N' -> + let Fmt_EBB fmt_rest = parse str_ind end_ind in + let counter = Token_counter in + if get_ign () then + let ignored = Ignored_scan_get_counter counter in + Fmt_EBB (Ignored_param (ignored, fmt_rest)) + else + Fmt_EBB (Scan_get_counter (counter, fmt_rest)) + | 'l' | 'n' | 'L' when str_ind=end_ind || not (is_int_base str.[str_ind]) -> + let Fmt_EBB fmt_rest = parse str_ind end_ind in + let counter = counter_of_char symb in + if get_ign () then + let ignored = Ignored_scan_get_counter counter in + Fmt_EBB (Ignored_param (ignored, fmt_rest)) + else + Fmt_EBB (Scan_get_counter (counter, fmt_rest)) + | 'l' -> + let iconv = + compute_int_conv pct_ind (str_ind + 1) (get_plus ()) (get_sharp ()) + (get_space ()) str.[str_ind] in + let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in + if get_ign () then + let ignored = Ignored_int32 (iconv, get_pad_opt '_') in + Fmt_EBB (Ignored_param (ignored, fmt_rest)) + else + let Padprec_fmt_EBB (pad', prec', fmt_rest') = + make_padprec_fmt_ebb (get_pad ()) (get_prec ()) fmt_rest in + Fmt_EBB (Int32 (iconv, pad', prec', fmt_rest')) + | 'n' -> + let iconv = + compute_int_conv pct_ind (str_ind + 1) (get_plus ()) + (get_sharp ()) (get_space ()) str.[str_ind] in + let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in + if get_ign () then + let ignored = Ignored_nativeint (iconv, get_pad_opt '_') in + Fmt_EBB (Ignored_param (ignored, fmt_rest)) + else + let Padprec_fmt_EBB (pad', prec', fmt_rest') = + make_padprec_fmt_ebb (get_pad ()) (get_prec ()) fmt_rest in + Fmt_EBB (Nativeint (iconv, pad', prec', fmt_rest')) + | 'L' -> + let iconv = + compute_int_conv pct_ind (str_ind + 1) (get_plus ()) (get_sharp ()) + (get_space ()) str.[str_ind] in + let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in + if get_ign () then + let ignored = Ignored_int64 (iconv, get_pad_opt '_') in + Fmt_EBB (Ignored_param (ignored, fmt_rest)) + else + let Padprec_fmt_EBB (pad', prec', fmt_rest') = + make_padprec_fmt_ebb (get_pad ()) (get_prec ()) fmt_rest in + Fmt_EBB (Int64 (iconv, pad', prec', fmt_rest')) + | 'f' | 'e' | 'E' | 'g' | 'G' | 'F' -> + let fconv = compute_float_conv pct_ind str_ind (get_plus ()) + (get_space ()) symb in + let Fmt_EBB fmt_rest = parse str_ind end_ind in + if get_ign () then + let ignored = Ignored_float (get_pad_opt '_', get_prec_opt ()) in + Fmt_EBB (Ignored_param (ignored, fmt_rest)) + else + let Padprec_fmt_EBB (pad', prec', fmt_rest') = + make_padprec_fmt_ebb (get_pad ()) (get_prec ()) fmt_rest in + Fmt_EBB (Float (fconv, pad', prec', fmt_rest')) + | 'b' | 'B' -> + let Fmt_EBB fmt_rest = parse str_ind end_ind in + if get_ign () then Fmt_EBB (Ignored_param (Ignored_bool, fmt_rest)) + else Fmt_EBB (Bool fmt_rest) + | 'a' -> + let Fmt_EBB fmt_rest = parse str_ind end_ind in + Fmt_EBB (Alpha fmt_rest) + | 't' -> + let Fmt_EBB fmt_rest = parse str_ind end_ind in + Fmt_EBB (Theta fmt_rest) + | 'r' -> + let Fmt_EBB fmt_rest = parse str_ind end_ind in + if get_ign () then Fmt_EBB (Ignored_param (Ignored_reader, fmt_rest)) + else Fmt_EBB (Reader fmt_rest) + | '!' -> + let Fmt_EBB fmt_rest = parse str_ind end_ind in + Fmt_EBB (Flush fmt_rest) + | ('%' | '@') as c -> + let Fmt_EBB fmt_rest = parse str_ind end_ind in + Fmt_EBB (Char_literal (c, fmt_rest)) + | '{' -> + let sub_end = search_subformat_end str_ind end_ind '}' in + let Fmt_EBB sub_fmt = parse str_ind sub_end in + let Fmt_EBB fmt_rest = parse (sub_end + 2) end_ind in + let sub_fmtty = fmtty_of_fmt sub_fmt in + if get_ign () then + let ignored = Ignored_format_arg (get_pad_opt '_', sub_fmtty) in + Fmt_EBB (Ignored_param (ignored, fmt_rest)) + else + Fmt_EBB (Format_arg (get_pad_opt '{', sub_fmtty, fmt_rest)) + | '(' -> + let sub_end = search_subformat_end str_ind end_ind ')' in + let Fmt_EBB fmt_rest = parse (sub_end + 2) end_ind in + let Fmt_EBB sub_fmt = parse str_ind sub_end in + let sub_fmtty = fmtty_of_fmt sub_fmt in + if get_ign () then + let ignored = Ignored_format_subst (get_pad_opt '_', sub_fmtty) in + Fmt_EBB (Ignored_param (ignored, fmt_rest)) + else + Fmt_EBB (Format_subst (get_pad_opt '(', sub_fmtty, fmt_rest)) + | '[' -> + let next_ind, char_set = parse_char_set str_ind end_ind in + let Fmt_EBB fmt_rest = parse next_ind end_ind in + if get_ign () then + let ignored = Ignored_scan_char_set (get_pad_opt '_', char_set) in + Fmt_EBB (Ignored_param (ignored, fmt_rest)) + else + Fmt_EBB (Scan_char_set (get_pad_opt '[', char_set, fmt_rest)) + | '-' | '+' | '#' | ' ' | '_' -> + failwith_message + "invalid format %S: at character number %d, \ + flag %C is only allowed after the '%%', before padding and precision" + str pct_ind symb + | _ -> + failwith_message + "invalid format %S: at character number %d, \ + invalid conversion \"%%%c\"" str (str_ind - 1) symb + in + (* Check for unused options, and reject them as incompatible. + + Such checks need to be disabled in legacy mode, as the legacy + parser silently ignored incompatible flags. *) + if not legacy_behavior then begin + if not !plus_used && plus then + incompatible_flag pct_ind str_ind symb "'+'"; + if not !sharp_used && sharp then + incompatible_flag pct_ind str_ind symb "'#'"; + if not !space_used && space then + incompatible_flag pct_ind str_ind symb "' '"; + if not !pad_used && Padding_EBB pad <> Padding_EBB No_padding then + incompatible_flag pct_ind str_ind symb "`padding'"; + if not !prec_used && Precision_EBB prec <> Precision_EBB No_precision then + incompatible_flag pct_ind str_ind (if ign then '_' else symb) + "`precision'"; + if ign && plus then incompatible_flag pct_ind str_ind '_' "'+'"; + end; + (* this last test must not be disabled in legacy mode, + as ignoring it would typically result in a different typing + than what the legacy parser used *) + if not !ign_used && ign then + begin match symb with + (* argument-less formats can safely be ignored in legacy mode *) + | ('@' | '%' | '!' | ',') when legacy_behavior -> () + | _ -> + incompatible_flag pct_ind str_ind symb "'_'" + end; + fmt_result + + (* Parse formatting informations (after '@'). *) + and parse_after_at : type e f . int -> int -> (_, _, e, f) fmt_ebb = + fun str_ind end_ind -> + if str_ind = end_ind then Fmt_EBB (Char_literal ('@', End_of_format)) + else + match str.[str_ind] with + | '[' -> + parse_tag false (str_ind + 1) end_ind + | ']' -> + let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in + Fmt_EBB (Formatting_lit (Close_box, fmt_rest)) + | '{' -> + parse_tag true (str_ind + 1) end_ind + | '}' -> + let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in + Fmt_EBB (Formatting_lit (Close_tag, fmt_rest)) + | ',' -> + let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in + Fmt_EBB (Formatting_lit (Break ("@,", 0, 0), fmt_rest)) + | ' ' -> + let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in + Fmt_EBB (Formatting_lit (Break ("@ ", 1, 0), fmt_rest)) + | ';' -> + parse_good_break (str_ind + 1) end_ind + | '?' -> + let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in + Fmt_EBB (Formatting_lit (FFlush, fmt_rest)) + | '\n' -> + let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in + Fmt_EBB (Formatting_lit (Force_newline, fmt_rest)) + | '.' -> + let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in + Fmt_EBB (Formatting_lit (Flush_newline, fmt_rest)) + | '<' -> + parse_magic_size (str_ind + 1) end_ind + | '@' -> + let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in + Fmt_EBB (Formatting_lit (Escaped_at, fmt_rest)) + | '%' when str_ind + 1 < end_ind && str.[str_ind + 1] = '%' -> + let Fmt_EBB fmt_rest = parse (str_ind + 2) end_ind in + Fmt_EBB (Formatting_lit (Escaped_percent, fmt_rest)) + | '%' -> + let Fmt_EBB fmt_rest = parse str_ind end_ind in + Fmt_EBB (Char_literal ('@', fmt_rest)) + | c -> + let Fmt_EBB fmt_rest = parse (str_ind + 1) end_ind in + Fmt_EBB (Formatting_lit (Scan_indic c, fmt_rest)) + + and check_open_box : type a b c d e f . (a, b, c, d, e, f) fmt -> unit = + fun fmt -> match fmt with + | String_literal (str, End_of_format) -> ( + try ignore (open_box_of_string str) with Failure _ -> + ((* Emit warning: invalid open box *)) + ) + | _ -> () + + (* Try to read the optionnal <name> after "@{" or "@[". *) + and parse_tag : type e f . bool -> int -> int -> (_, _, e, f) fmt_ebb = + fun is_open_tag str_ind end_ind -> + try + if str_ind = end_ind then raise Not_found; + match str.[str_ind] with + | '<' -> + let ind = String.index_from str (str_ind + 1) '>' in + if ind >= end_ind then raise Not_found; + let sub_str = String.sub str str_ind (ind - str_ind + 1) in + let Fmt_EBB fmt_rest = parse (ind + 1) end_ind in + let Fmt_EBB sub_fmt = parse str_ind (ind + 1) in + let sub_format = Format (sub_fmt, sub_str) in + let formatting = if is_open_tag then Open_tag sub_format else ( + check_open_box sub_fmt; + Open_box sub_format) in + Fmt_EBB (Formatting_gen (formatting, fmt_rest)) + | _ -> + raise Not_found + with Not_found -> + let Fmt_EBB fmt_rest = parse str_ind end_ind in + let sub_format = Format (End_of_format, "") in + let formatting = + if is_open_tag then Open_tag sub_format else Open_box sub_format in + Fmt_EBB (Formatting_gen (formatting, fmt_rest)) + + (* Try to read the optionnal <width offset> after "@;". *) + and parse_good_break : type e f . int -> int -> (_, _, e, f) fmt_ebb = + fun str_ind end_ind -> + let next_ind, formatting_lit = + try + if str_ind = end_ind || str.[str_ind] <> '<' then raise Not_found; + let str_ind_1 = parse_spaces (str_ind + 1) end_ind in + match str.[str_ind_1] with + | '0' .. '9' | '-' -> ( + let str_ind_2, width = parse_integer str_ind_1 end_ind in + let str_ind_3 = parse_spaces str_ind_2 end_ind in + match str.[str_ind_3] with + | '>' -> + let s = String.sub str (str_ind-2) (str_ind_3-str_ind+3) in + str_ind_3 + 1, Break (s, width, 0) + | '0' .. '9' | '-' -> + let str_ind_4, offset = parse_integer str_ind_3 end_ind in + let str_ind_5 = parse_spaces str_ind_4 end_ind in + if str.[str_ind_5] <> '>' then raise Not_found; + let s = String.sub str (str_ind-2) (str_ind_5-str_ind+3) in + str_ind_5 + 1, Break (s, width, offset) + | _ -> raise Not_found + ) + | _ -> raise Not_found + with Not_found | Failure _ -> + str_ind, Break ("@;", 1, 0) + in + let Fmt_EBB fmt_rest = parse next_ind end_ind in + Fmt_EBB (Formatting_lit (formatting_lit, fmt_rest)) + + (* Parse the size in a <n>. *) + and parse_magic_size : type e f . int -> int -> (_, _, e, f) fmt_ebb = + fun str_ind end_ind -> + match + try + let str_ind_1 = parse_spaces str_ind end_ind in + match str.[str_ind_1] with + | '0' .. '9' | '-' -> + let str_ind_2, size = parse_integer str_ind_1 end_ind in + let str_ind_3 = parse_spaces str_ind_2 end_ind in + if str.[str_ind_3] <> '>' then raise Not_found; + let s = String.sub str (str_ind - 2) (str_ind_3 - str_ind + 3) in + Some (str_ind_3 + 1, Magic_size (s, size)) + | _ -> None + with Not_found | Failure _ -> + None + with + | Some (next_ind, formatting_lit) -> + let Fmt_EBB fmt_rest = parse next_ind end_ind in + Fmt_EBB (Formatting_lit (formatting_lit, fmt_rest)) + | None -> + let Fmt_EBB fmt_rest = parse str_ind end_ind in + Fmt_EBB (Formatting_lit (Scan_indic '<', fmt_rest)) + + (* Parse and construct a char set. *) + and parse_char_set str_ind end_ind = + if str_ind = end_ind then unexpected_end_of_format end_ind; + + let char_set = create_char_set () in + let add_char c = + add_in_char_set char_set c; + in + let add_range c c' = + for i = int_of_char c to int_of_char c' do + add_in_char_set char_set (char_of_int i); + done; + in + + let fail_single_percent str_ind = + failwith_message + "invalid format %S: '%%' alone is not accepted in character sets, \ + use %%%% instead at position %d." str str_ind; + in + + (* Parse the first character of a char set. *) + let rec parse_char_set_start str_ind end_ind = + if str_ind = end_ind then unexpected_end_of_format end_ind; + let c = str.[str_ind] in + parse_char_set_after_char (str_ind + 1) end_ind c; + + (* Parse the content of a char set until the first ']'. *) + and parse_char_set_content str_ind end_ind = + if str_ind = end_ind then unexpected_end_of_format end_ind; + match str.[str_ind] with + | ']' -> + str_ind + 1 + | '-' -> + add_char '-'; + parse_char_set_content (str_ind + 1) end_ind; + | c -> + parse_char_set_after_char (str_ind + 1) end_ind c; + + (* Test for range in char set. *) + and parse_char_set_after_char str_ind end_ind c = + if str_ind = end_ind then unexpected_end_of_format end_ind; + match str.[str_ind] with + | ']' -> + add_char c; + str_ind + 1 + | '-' -> + parse_char_set_after_minus (str_ind + 1) end_ind c + | ('%' | '@') as c' when c = '%' -> + add_char c'; + parse_char_set_content (str_ind + 1) end_ind + | c' -> + if c = '%' then fail_single_percent str_ind; + (* note that '@' alone is accepted, as done by the legacy implementation; + the documentation specifically requires %@ so we could warn on that *) + add_char c; + parse_char_set_after_char (str_ind + 1) end_ind c' + + (* Manage range in char set (except if the '-' the last char before ']') *) + and parse_char_set_after_minus str_ind end_ind c = + if str_ind = end_ind then unexpected_end_of_format end_ind; + match str.[str_ind] with + | ']' -> + add_char c; + add_char '-'; + str_ind + 1 + | '%' -> + if str_ind + 1 = end_ind then unexpected_end_of_format end_ind; + begin match str.[str_ind + 1] with + | ('%' | '@') as c' -> + add_range c c'; + parse_char_set_content (str_ind + 2) end_ind + | _ -> fail_single_percent str_ind + end + | c' -> + add_range c c'; + parse_char_set_content (str_ind + 1) end_ind + in + let str_ind, reverse = + if str_ind = end_ind then unexpected_end_of_format end_ind; + match str.[str_ind] with + | '^' -> str_ind + 1, true + | _ -> str_ind, false in + let next_ind = parse_char_set_start str_ind end_ind in + let char_set = freeze_char_set char_set in + next_ind, (if reverse then rev_char_set char_set else char_set) + + (* Consume all next spaces, raise an Failure if end_ind is reached. *) + and parse_spaces str_ind end_ind = + if str_ind = end_ind then unexpected_end_of_format end_ind; + if str.[str_ind] = ' ' then parse_spaces (str_ind + 1) end_ind else str_ind + + (* Read a positive integer from the string, raise a Failure if end_ind is + reached. *) + and parse_positive str_ind end_ind acc = + if str_ind = end_ind then unexpected_end_of_format end_ind; + match str.[str_ind] with + | '0' .. '9' as c -> + let new_acc = acc * 10 + (int_of_char c - int_of_char '0') in + if new_acc > Sys.max_string_length then + failwith_message + "invalid format %S: integer %d is greater than the limit %d" + str new_acc Sys.max_string_length + else + parse_positive (str_ind + 1) end_ind new_acc + | _ -> str_ind, acc + + (* Read a positive or negative integer from the string, raise a Failure + if end_ind is reached. *) + and parse_integer str_ind end_ind = + if str_ind = end_ind then unexpected_end_of_format end_ind; + match str.[str_ind] with + | '0' .. '9' -> parse_positive str_ind end_ind 0 + | '-' -> ( + if str_ind + 1 = end_ind then unexpected_end_of_format end_ind; + match str.[str_ind + 1] with + | '0' .. '9' -> + let next_ind, n = parse_positive (str_ind + 1) end_ind 0 in + next_ind, -n + | c -> + expected_character (str_ind + 1) "digit" c + ) + | _ -> assert false + + (* Add a literal to a format from a literal character sub-sequence. *) + and add_literal : type a d e f . + int -> int -> (a, _, _, d, e, f) fmt -> + (_, _, e, f) fmt_ebb = + fun lit_start str_ind fmt -> match str_ind - lit_start with + | 0 -> Fmt_EBB fmt + | 1 -> Fmt_EBB (Char_literal (str.[lit_start], fmt)) + | size -> Fmt_EBB (String_literal (String.sub str lit_start size, fmt)) + + (* Search the end of the current sub-format + (i.e. the corresponding "%}" or "%)") *) + and search_subformat_end str_ind end_ind c = + if str_ind = end_ind then + failwith_message + "invalid format %S: unclosed sub-format, \ + expected \"%%%c\" at character number %d" str c end_ind; + match str.[str_ind] with + | '%' -> + if str_ind + 1 = end_ind then unexpected_end_of_format end_ind; + if str.[str_ind + 1] = c then (* End of format found *) str_ind else + begin match str.[str_ind + 1] with + | '_' -> + (* Search for "%_(" or "%_{". *) + if str_ind + 2 = end_ind then unexpected_end_of_format end_ind; + begin match str.[str_ind + 2] with + | '{' -> + let sub_end = search_subformat_end (str_ind + 3) end_ind '}' in + search_subformat_end (sub_end + 2) end_ind c + | '(' -> + let sub_end = search_subformat_end (str_ind + 3) end_ind ')' in + search_subformat_end (sub_end + 2) end_ind c + | _ -> search_subformat_end (str_ind + 3) end_ind c + end + | '{' -> + (* %{...%} sub-format found. *) + let sub_end = search_subformat_end (str_ind + 2) end_ind '}' in + search_subformat_end (sub_end + 2) end_ind c + | '(' -> + (* %(...%) sub-format found. *) + let sub_end = search_subformat_end (str_ind + 2) end_ind ')' in + search_subformat_end (sub_end + 2) end_ind c + | '}' -> + (* Error: %(...%}. *) + expected_character (str_ind + 1) "character ')'" '}'; + | ')' -> + (* Error: %{...%). *) + expected_character (str_ind + 1) "character '}'" ')'; + | _ -> + search_subformat_end (str_ind + 2) end_ind c + end + | _ -> search_subformat_end (str_ind + 1) end_ind c + + (* Check if symb is a valid int conversion after "%l", "%n" or "%L" *) + and is_int_base symb = match symb with + | 'd' | 'i' | 'x' | 'X' | 'o' | 'u' -> true + | _ -> false + + (* Convert a char (l, n or L) to its associated counter. *) + and counter_of_char symb = match symb with + | 'l' -> Line_counter | 'n' -> Char_counter + | 'L' -> Token_counter | _ -> assert false + + (* Convert (plus, symb) to its associated int_conv. *) + and compute_int_conv pct_ind str_ind plus sharp space symb = + match plus, sharp, space, symb with + | false, false, false, 'd' -> Int_d | false, false, false, 'i' -> Int_i + | false, false, true, 'd' -> Int_sd | false, false, true, 'i' -> Int_si + | true, false, false, 'd' -> Int_pd | true, false, false, 'i' -> Int_pi + | false, false, false, 'x' -> Int_x | false, false, false, 'X' -> Int_X + | false, true, false, 'x' -> Int_Cx | false, true, false, 'X' -> Int_CX + | false, false, false, 'o' -> Int_o + | false, true, false, 'o' -> Int_Co + | false, false, false, 'u' -> Int_u + | _, true, _, 'x' when legacy_behavior -> Int_Cx + | _, true, _, 'X' when legacy_behavior -> Int_CX + | _, true, _, 'o' when legacy_behavior -> Int_Co + | _, true, _, _ -> + if legacy_behavior then (* ignore *) + compute_int_conv pct_ind str_ind plus false space symb + else incompatible_flag pct_ind str_ind symb "'#'" + | true, false, true, _ -> + if legacy_behavior then + (* plus and space: legacy implementation prefers plus *) + compute_int_conv pct_ind str_ind plus sharp false symb + else incompatible_flag pct_ind str_ind ' ' "'+'" + | false, false, true, _ -> + if legacy_behavior then (* ignore *) + compute_int_conv pct_ind str_ind plus sharp false symb + else incompatible_flag pct_ind str_ind symb "' '" + | true, false, false, _ -> + if legacy_behavior then (* ignore *) + compute_int_conv pct_ind str_ind false sharp space symb + else incompatible_flag pct_ind str_ind symb "'+'" + | false, false, false, _ -> assert false + + (* Convert (plus, symb) to its associated float_conv. *) + and compute_float_conv pct_ind str_ind plus space symb = + match plus, space, symb with + | false, false, 'f' -> Float_f | false, false, 'e' -> Float_e + | false, true, 'f' -> Float_sf | false, true, 'e' -> Float_se + | true, false, 'f' -> Float_pf | true, false, 'e' -> Float_pe + | false, false, 'E' -> Float_E | false, false, 'g' -> Float_g + | false, true, 'E' -> Float_sE | false, true, 'g' -> Float_sg + | true, false, 'E' -> Float_pE | true, false, 'g' -> Float_pg + | false, false, 'G' -> Float_G + | false, true, 'G' -> Float_sG + | true, false, 'G' -> Float_pG + | false, false, 'F' -> Float_F + | true, true, _ -> + if legacy_behavior then + (* plus and space: legacy implementation prefers plus *) + compute_float_conv pct_ind str_ind plus false symb + else incompatible_flag pct_ind str_ind ' ' "'+'" + | false, true, _ -> + if legacy_behavior then (* ignore *) + compute_float_conv pct_ind str_ind plus false symb + else incompatible_flag pct_ind str_ind symb "' '" + | true, false, _ -> + if legacy_behavior then (* ignore *) + compute_float_conv pct_ind str_ind false space symb + else incompatible_flag pct_ind str_ind symb "'+'" + | false, false, _ -> assert false + + (* Raise a Failure with a friendly error message about incompatible options.*) + and incompatible_flag : type a . int -> int -> char -> string -> a = + fun pct_ind str_ind symb option -> + let subfmt = String.sub str pct_ind (str_ind - pct_ind) in + failwith_message + "invalid format %S: at character number %d, \ + %s is incompatible with '%c' in sub-format %S" + str pct_ind option symb subfmt; + + in parse 0 (String.length str) + +(******************************************************************************) + (* Guarded string to format conversions *) + +(* Convert a string to a format according to an fmtty. *) +(* Raise a Failure with an error message in case of type mismatch. *) +let format_of_string_fmtty str fmtty = + let Fmt_EBB fmt = fmt_ebb_of_string str in + try Format (type_format fmt fmtty, str) + with Type_mismatch -> + failwith_message + "bad input: format type mismatch between %S and %S" + str (string_of_fmtty fmtty) + +(* Convert a string to a format compatible with an other format. *) +(* Raise a Failure with an error message in case of type mismatch. *) +let format_of_string_format str (Format (fmt', str')) = + let Fmt_EBB fmt = fmt_ebb_of_string str in + try Format (type_format fmt (fmtty_of_fmt fmt'), str) + with Type_mismatch -> + failwith_message + "bad input: format type mismatch between %S and %S" str str' diff --git a/stdlib/camlinternalFormat.mli b/stdlib/camlinternalFormat.mli new file mode 100644 index 000000000..dd8da62d2 --- /dev/null +++ b/stdlib/camlinternalFormat.mli @@ -0,0 +1,104 @@ +(* No comments, OCaml stdlib internal use only. *) + +open CamlinternalFormatBasics + +val is_in_char_set : char_set -> char -> bool +val rev_char_set : char_set -> char_set + +type mutable_char_set = bytes +val create_char_set : unit -> mutable_char_set +val add_in_char_set : mutable_char_set -> char -> unit +val freeze_char_set : mutable_char_set -> char_set + +type ('a, 'b, 'c, 'd, 'e, 'f) param_format_ebb = Param_format_EBB : + ('x -> 'a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('a, 'b, 'c, 'd, 'e, 'f) param_format_ebb + +val param_format_of_ignored_format : + ('a, 'b, 'c, 'd, 'y, 'x) ignored -> ('x, 'b, 'c, 'y, 'e, 'f) fmt -> + ('a, 'b, 'c, 'd, 'e, 'f) param_format_ebb + +type ('b, 'c) acc_formatting_gen = + | Acc_open_tag of ('b, 'c) acc + | Acc_open_box of ('b, 'c) acc + +and ('b, 'c) acc = + | Acc_formatting_lit of ('b, 'c) acc * formatting_lit + | Acc_formatting_gen of ('b, 'c) acc * ('b, 'c) acc_formatting_gen + | Acc_string_literal of ('b, 'c) acc * string + | Acc_char_literal of ('b, 'c) acc * char + | Acc_data_string of ('b, 'c) acc * string + | Acc_data_char of ('b, 'c) acc * char + | Acc_delay of ('b, 'c) acc * ('b -> 'c) + | Acc_flush of ('b, 'c) acc + | Acc_invalid_arg of ('b, 'c) acc * string + | End_of_acc + +type ('a, 'b) heter_list = + | Cons : 'c * ('a, 'b) heter_list -> ('c -> 'a, 'b) heter_list + | Nil : ('b, 'b) heter_list + +type ('b, 'c, 'e, 'f) fmt_ebb = Fmt_EBB : + ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.fmt -> + ('b, 'c, 'e, 'f) fmt_ebb + +val make_printf : + ('b -> ('b, 'c) acc -> 'd) -> 'b -> ('b, 'c) acc -> + ('a, 'b, 'c, 'c, 'c, 'd) CamlinternalFormatBasics.fmt -> 'a + +val output_acc : out_channel -> (out_channel, unit) acc -> unit +val bufput_acc : Buffer.t -> (Buffer.t, unit) acc -> unit +val strput_acc : Buffer.t -> (unit, string) acc -> unit + +val type_format : + ('x, 'b, 'c, 't, 'u, 'v) CamlinternalFormatBasics.fmt -> + ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.fmtty -> + ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.fmt + +val fmt_ebb_of_string : ?legacy_behavior:bool -> string -> ('b, 'c, 'e, 'f) fmt_ebb +(* warning: the optional flag legacy_behavior is EXPERIMENTAL and will + be removed in the next version. You must not set it explicitly. It + is only used by the type-checker implementation. +*) + +val format_of_string_fmtty : + string -> + ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.fmtty -> + ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.format6 + +val format_of_string_format : + string -> + ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.format6 -> + ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.format6 + +val char_of_iconv : CamlinternalFormatBasics.int_conv -> char +val string_of_formatting_lit : CamlinternalFormatBasics.formatting_lit -> string +val string_of_formatting_gen : + ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.formatting_gen -> string + +val string_of_fmtty : + ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.fmtty -> string +val string_of_fmt : + ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.fmt -> string + +val open_box_of_string : string -> int * block_type + +val symm : + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel +-> ('a2, 'b2, 'c2, 'd2, 'e2, 'f2, + 'a1, 'b1, 'c1, 'd1, 'e1, 'f1) fmtty_rel + +val trans : + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel +-> ('a2, 'b2, 'c2, 'd2, 'e2, 'f2, + 'a3, 'b3, 'c3, 'd3, 'e3, 'f3) fmtty_rel +-> ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a3, 'b3, 'c3, 'd3, 'e3, 'f3) fmtty_rel + +val recast : + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1) fmt +-> ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel +-> ('a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmt diff --git a/stdlib/camlinternalFormatBasics.ml b/stdlib/camlinternalFormatBasics.ml new file mode 100644 index 000000000..e51e4e2ce --- /dev/null +++ b/stdlib/camlinternalFormatBasics.ml @@ -0,0 +1,614 @@ +(* Padding position. *) +type padty = + | Left (* Text is left justified ('-' option). *) + | Right (* Text is right justified (no '-' option). *) + | Zeros (* Text is right justified by zeros (see '0' option). *) + +(***) + +(* Integer conversion. *) +type int_conv = + | Int_d | Int_pd | Int_sd (* %d | %+d | % d *) + | Int_i | Int_pi | Int_si (* %i | %+i | % i *) + | Int_x | Int_Cx (* %x | %#x *) + | Int_X | Int_CX (* %X | %#X *) + | Int_o | Int_Co (* %o | %#o *) + | Int_u (* %u *) + +(* Float conversion. *) +type float_conv = + | Float_f | Float_pf | Float_sf (* %f | %+f | % f *) + | Float_e | Float_pe | Float_se (* %e | %+e | % e *) + | Float_E | Float_pE | Float_sE (* %E | %+E | % E *) + | Float_g | Float_pg | Float_sg (* %g | %+g | % g *) + | Float_G | Float_pG | Float_sG (* %G | %+G | % G *) + | Float_F (* %F *) + +(***) + +(* Char sets (see %[...]) are bitmaps implemented as 32-char strings. *) +type char_set = string + +(***) + +(* Counter used in Scanf. *) +type counter = + | Line_counter (* %l *) + | Char_counter (* %n *) + | Token_counter (* %N, %L *) + +(***) + +(* Padding of strings and numbers. *) +type ('a, 'b) padding = + (* No padding (ex: "%d") *) + | No_padding : ('a, 'a) padding + (* Literal padding (ex: "%8d") *) + | Lit_padding : padty * int -> ('a, 'a) padding + (* Padding as extra argument (ex: "%*d") *) + | Arg_padding : padty -> (int -> 'a, 'a) padding + +(* Some formats, such as %_d, + only accept an optional number as padding option (no extra argument) *) +type pad_option = int option + +(* Precision of floats and '0'-padding of integers. *) +type ('a, 'b) precision = + (* No precision (ex: "%f") *) + | No_precision : ('a, 'a) precision + (* Literal precision (ex: "%.3f") *) + | Lit_precision : int -> ('a, 'a) precision + (* Precision as extra argument (ex: "%.*f") *) + | Arg_precision : (int -> 'a, 'a) precision + +(* Some formats, such as %_f, + only accept an optional number as precision option (no extra argument) *) +type prec_option = int option + +(***) + +(* Relational format types + +In the first format+gadts implementation, the type for %(..%) in the +fmt GADT was as follows: + +| Format_subst : (* %(...%) *) + pad_option * ('d1, 'q1, 'd2, 'q2) reader_nb_unifier * + ('x, 'b, 'c, 'd1, 'q1, 'u) fmtty * + ('u, 'b, 'c, 'q1, 'e1, 'f) fmt -> + (('x, 'b, 'c, 'd2, 'q2, 'u) format6 -> 'x, 'b, 'c, 'd1, 'e1, 'f) fmt + +Notice that the 'u parameter in 'f position in the format argument +(('x, .., 'u) format6 -> ..) is equal to the 'u parameter in 'a +position in the format tail (('u, .., 'f) fmt). This means that the +type of the expected format parameter depends of where the %(...%) +are in the format string: + + # Printf.printf "%(%)";; + - : (unit, out_channel, unit, '_a, '_a, unit) + CamlinternalFormatBasics.format6 -> unit + = <fun> + # Printf.printf "%(%)%d";; + - : (int -> unit, out_channel, unit, '_a, '_a, int -> unit) + CamlinternalFormatBasics.format6 -> int -> unit + = <fun> + +On the contrary, the legacy typer gives a clever type that does not +depend on the position of %(..%) in the format string. For example, +%(%) will have the polymorphic type ('a, 'b, 'c, 'd, 'd, 'a): it can +be concatenated to any format type, and only enforces the constraint +that its 'a and 'f parameters are equal (no format arguments) and 'd +and 'e are equal (no reader argument). + +The weakening of this parameter type in the GADT version broke user +code (in fact it essentially made %(...%) unusable except at the last +position of a format). In particular, the following would not work +anymore: + + fun sep -> + Format.printf "foo%(%)bar%(%)baz" sep sep + +As the type-checker would require two *incompatible* types for the %(%) +in different positions. + +The solution to regain a general type for %(..%) is to generalize this +technique, not only on the 'd, 'e parameters, but on all six +parameters of a format: we introduce a "relational" type + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel +whose values are proofs that ('a1, .., 'f1) and ('a2, .., 'f2) morally +correspond to the same format type: 'a1 is obtained from 'f1,'b1,'c1 +in the exact same way that 'a2 is obtained from 'f2,'b2,'c2, etc. + +For example, the relation between two format types beginning with a Char +parameter is as follows: + +| Char_ty : (* %c *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (char -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + char -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel + +In the general case, the term structure of fmtty_rel is (almost¹) +isomorphic to the fmtty of the previous implementation: every +constructor is re-read with a binary, relational type, instead of the +previous unary typing. fmtty can then be re-defined as the diagonal of +fmtty_rel: + + type ('a, 'b, 'c, 'd, 'e, 'f) fmtty = + ('a, 'b, 'c, 'd, 'e, 'f, + 'a, 'b, 'c, 'd, 'e, 'f) fmtty_rel + +Once we have this fmtty_rel type in place, we can give the more +general type to %(...%): + +| Format_subst : (* %(...%) *) + pad_option * + ('g, 'h, 'i, 'j, 'k, 'l, + 'g2, 'b, 'c, 'j2, 'd, 'a) fmtty_rel * + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + (('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'g2, 'b, 'c, 'j2, 'e, 'f) fmt + +We accept any format (('g, 'h, 'i, 'j, 'k, 'l) format6) (this is +completely unrelated to the type of the current format), but also +require a proof that this format is in relation to another format that +is concatenable to the format tail. When executing a %(...%) format +(in camlinternalFormat.ml:make_printf or scanf.ml:make_scanf), we +transtype the format along this relation using the 'recast' function +to transpose between related format types. + + val recast : + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1) fmt + -> ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel + -> ('a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmt + +NOTE ¹: the typing of Format_subst_ty requires not one format type, but +two, one to establish the link between the format argument and the +first six parameters, and the other for the link between the format +argumant and the last six parameters. + +| Format_subst_ty : (* %(...%) *) + ('g, 'h, 'i, 'j, 'k, 'l, + 'g1, 'b1, 'c1, 'j1, 'd1, 'a1) fmtty_rel * + ('g, 'h, 'i, 'j, 'k, 'l, + 'g2, 'b2, 'c2, 'j2, 'd2, 'a2) fmtty_rel * + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'g1, 'b1, 'c1, 'j1, 'e1, 'f1, + ('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'g2, 'b2, 'c2, 'j2, 'e2, 'f2) fmtty_rel + +When we generate a format AST, we generate exactly the same witness +for both relations, and the witness-conversion functions in +camlinternalFormat do rely on this invariant. For example, the +function that proves that the relation is transitive + + val trans : + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel + -> ('a2, 'b2, 'c2, 'd2, 'e2, 'f2, + 'a3, 'b3, 'c3, 'd3, 'e3, 'f3) fmtty_rel + -> ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a3, 'b3, 'c3, 'd3, 'e3, 'f3) fmtty_rel + +does assume that the two input have exactly the same term structure +(and is only every used for argument witnesses of the +Format_subst_ty constructor). +*) + +(* Type of a block used by the Format pretty-printer. *) +type block_type = + | Pp_hbox (* Horizontal block no line breaking *) + | Pp_vbox (* Vertical block each break leads to a new line *) + | Pp_hvbox (* Horizontal-vertical block: same as vbox, except if this block + is small enough to fit on a single line *) + | Pp_hovbox (* Horizontal or Vertical block: breaks lead to new line + only when necessary to print the content of the block *) + | Pp_box (* Horizontal or Indent block: breaks lead to new line + only when necessary to print the content of the block, or + when it leads to a new indentation of the current line *) + | Pp_fits (* Internal usage: when a block fits on a single line *) + +(* Formatting element used by the Format pretty-printter. *) +type formatting_lit = + | Close_box (* @] *) + | Close_tag (* @} *) + | Break of string * int * int (* @, | @ | @; | @;<> *) + | FFlush (* @? *) + | Force_newline (* @\n *) + | Flush_newline (* @. *) + | Magic_size of string * int (* @<n> *) + | Escaped_at (* @@ *) + | Escaped_percent (* @%% *) + | Scan_indic of char (* @X *) + +(* Formatting element used by the Format pretty-printter. *) +type ('a, 'b, 'c, 'd, 'e, 'f) formatting_gen = + | Open_tag : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> (* @{ *) + ('a, 'b, 'c, 'd, 'e, 'f) formatting_gen + | Open_box : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> (* @[ *) + ('a, 'b, 'c, 'd, 'e, 'f) formatting_gen + +(***) + +(* List of format type elements. *) +(* In particular used to represent %(...%) and %{...%} contents. *) +and ('a, 'b, 'c, 'd, 'e, 'f) fmtty = + ('a, 'b, 'c, 'd, 'e, 'f, + 'a, 'b, 'c, 'd, 'e, 'f) fmtty_rel +and ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel = + | Char_ty : (* %c *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (char -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + char -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel + | String_ty : (* %s *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (string -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + string -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel + | Int_ty : (* %d *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (int -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + int -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel + | Int32_ty : (* %ld *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (int32 -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + int32 -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel + | Nativeint_ty : (* %nd *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (nativeint -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + nativeint -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel + | Int64_ty : (* %Ld *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (int64 -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + int64 -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel + | Float_ty : (* %f *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (float -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + float -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel + | Bool_ty : (* %B *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (bool -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + bool -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel + + | Format_arg_ty : (* %{...%} *) + ('g, 'h, 'i, 'j, 'k, 'l) fmtty * + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + ('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel + | Format_subst_ty : (* %(...%) *) + ('g, 'h, 'i, 'j, 'k, 'l, + 'g1, 'b1, 'c1, 'j1, 'd1, 'a1) fmtty_rel * + ('g, 'h, 'i, 'j, 'k, 'l, + 'g2, 'b2, 'c2, 'j2, 'd2, 'a2) fmtty_rel * + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'g1, 'b1, 'c1, 'j1, 'e1, 'f1, + ('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'g2, 'b2, 'c2, 'j2, 'e2, 'f2) fmtty_rel + + (* Printf and Format specific constructors. *) + | Alpha_ty : (* %a *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (('b1 -> 'x -> 'c1) -> 'x -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + ('b2 -> 'x -> 'c2) -> 'x -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel + | Theta_ty : (* %t *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (('b1 -> 'c1) -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + ('b2 -> 'c2) -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel + + (* Scanf specific constructor. *) + | Reader_ty : (* %r *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + ('x -> 'a1, 'b1, 'c1, ('b1 -> 'x) -> 'd1, 'e1, 'f1, + 'x -> 'a2, 'b2, 'c2, ('b2 -> 'x) -> 'd2, 'e2, 'f2) fmtty_rel + | Ignored_reader_ty : (* %_r *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + ('a1, 'b1, 'c1, ('b1 -> 'x) -> 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, ('b2 -> 'x) -> 'd2, 'e2, 'f2) fmtty_rel + + | End_of_fmtty : + ('f1, 'b1, 'c1, 'd1, 'd1, 'f1, + 'f2, 'b2, 'c2, 'd2, 'd2, 'f2) fmtty_rel + +(***) + +(* List of format elements. *) +and ('a, 'b, 'c, 'd, 'e, 'f) fmt = + | Char : (* %c *) + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + (char -> 'a, 'b, 'c, 'd, 'e, 'f) fmt + | Caml_char : (* %C *) + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + (char -> 'a, 'b, 'c, 'd, 'e, 'f) fmt + | String : (* %s *) + ('x, string -> 'a) padding * ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('x, 'b, 'c, 'd, 'e, 'f) fmt + | Caml_string : (* %S *) + ('x, string -> 'a) padding * ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('x, 'b, 'c, 'd, 'e, 'f) fmt + | Int : (* %[dixXuo] *) + int_conv * ('x, 'y) padding * ('y, int -> 'a) precision * + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('x, 'b, 'c, 'd, 'e, 'f) fmt + | Int32 : (* %l[dixXuo] *) + int_conv * ('x, 'y) padding * ('y, int32 -> 'a) precision * + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('x, 'b, 'c, 'd, 'e, 'f) fmt + | Nativeint : (* %n[dixXuo] *) + int_conv * ('x, 'y) padding * ('y, nativeint -> 'a) precision * + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('x, 'b, 'c, 'd, 'e, 'f) fmt + | Int64 : (* %L[dixXuo] *) + int_conv * ('x, 'y) padding * ('y, int64 -> 'a) precision * + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('x, 'b, 'c, 'd, 'e, 'f) fmt + | Float : (* %[feEgGF] *) + float_conv * ('x, 'y) padding * ('y, float -> 'a) precision * + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('x, 'b, 'c, 'd, 'e, 'f) fmt + | Bool : (* %[bB] *) + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + (bool -> 'a, 'b, 'c, 'd, 'e, 'f) fmt + | Flush : (* %! *) + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('a, 'b, 'c, 'd, 'e, 'f) fmt + + | String_literal : (* abc *) + string * ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('a, 'b, 'c, 'd, 'e, 'f) fmt + | Char_literal : (* x *) + char * ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('a, 'b, 'c, 'd, 'e, 'f) fmt + + | Format_arg : (* %{...%} *) + pad_option * ('g, 'h, 'i, 'j, 'k, 'l) fmtty * + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + (('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'a, 'b, 'c, 'd, 'e, 'f) fmt + | Format_subst : (* %(...%) *) + pad_option * + ('g, 'h, 'i, 'j, 'k, 'l, + 'g2, 'b, 'c, 'j2, 'd, 'a) fmtty_rel * + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + (('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'g2, 'b, 'c, 'j2, 'e, 'f) fmt + + (* Printf and Format specific constructor. *) + | Alpha : (* %a *) + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + (('b -> 'x -> 'c) -> 'x -> 'a, 'b, 'c, 'd, 'e, 'f) fmt + | Theta : (* %t *) + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + (('b -> 'c) -> 'a, 'b, 'c, 'd, 'e, 'f) fmt + + (* Format specific constructor: *) + | Formatting_lit : (* @_ *) + formatting_lit * ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('a, 'b, 'c, 'd, 'e, 'f) fmt + | Formatting_gen : (* @_ *) + ('a1, 'b, 'c, 'd1, 'e1, 'f1) formatting_gen * + ('f1, 'b, 'c, 'e1, 'e2, 'f2) fmt -> ('a1, 'b, 'c, 'd1, 'e2, 'f2) fmt + + (* Scanf specific constructors: *) + | Reader : (* %r *) + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('x -> 'a, 'b, 'c, ('b -> 'x) -> 'd, 'e, 'f) fmt + | Scan_char_set : (* %[...] *) + pad_option * char_set * ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + (string -> 'a, 'b, 'c, 'd, 'e, 'f) fmt + | Scan_get_counter : (* %[nlNL] *) + counter * ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + (int -> 'a, 'b, 'c, 'd, 'e, 'f) fmt + | Ignored_param : (* %_ *) + ('a, 'b, 'c, 'd, 'y, 'x) ignored * ('x, 'b, 'c, 'y, 'e, 'f) fmt -> + ('a, 'b, 'c, 'd, 'e, 'f) fmt + + | End_of_format : + ('f, 'b, 'c, 'e, 'e, 'f) fmt + +(***) + +(* Type for ignored parameters (see "%_"). *) +and ('a, 'b, 'c, 'd, 'e, 'f) ignored = + | Ignored_char : (* %_c *) + ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_caml_char : (* %_C *) + ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_string : (* %_s *) + pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_caml_string : (* %_S *) + pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_int : (* %_d *) + int_conv * pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_int32 : (* %_ld *) + int_conv * pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_nativeint : (* %_nd *) + int_conv * pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_int64 : (* %_Ld *) + int_conv * pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_float : (* %_f *) + pad_option * prec_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_bool : (* %_B *) + ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_format_arg : (* %_{...%} *) + pad_option * ('g, 'h, 'i, 'j, 'k, 'l) fmtty -> + ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_format_subst : (* %_(...%) *) + pad_option * ('a, 'b, 'c, 'd, 'e, 'f) fmtty -> + ('a, 'b, 'c, 'd, 'e, 'f) ignored + | Ignored_reader : (* %_r *) + ('a, 'b, 'c, ('b -> 'x) -> 'd, 'd, 'a) ignored + | Ignored_scan_char_set : (* %_[...] *) + pad_option * char_set -> ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_scan_get_counter : (* %_[nlNL] *) + counter -> ('a, 'b, 'c, 'd, 'd, 'a) ignored + +and ('a, 'b, 'c, 'd, 'e, 'f) format6 = + Format of ('a, 'b, 'c, 'd, 'e, 'f) fmt * string + +let rec erase_rel : type a b c d e f g h i j k l . + (a, b, c, d, e, f, + g, h, i, j, k, l) fmtty_rel -> (a, b, c, d, e, f) fmtty += function + | Char_ty rest -> + Char_ty (erase_rel rest) + | String_ty rest -> + String_ty (erase_rel rest) + | Int_ty rest -> + Int_ty (erase_rel rest) + | Int32_ty rest -> + Int32_ty (erase_rel rest) + | Int64_ty rest -> + Int64_ty (erase_rel rest) + | Nativeint_ty rest -> + Nativeint_ty (erase_rel rest) + | Float_ty rest -> + Float_ty (erase_rel rest) + | Bool_ty rest -> + Bool_ty (erase_rel rest) + | Format_arg_ty (ty, rest) -> + Format_arg_ty (ty, erase_rel rest) + | Format_subst_ty (ty1, ty2, rest) -> + Format_subst_ty (ty1, ty1, erase_rel rest) + | Alpha_ty rest -> + Alpha_ty (erase_rel rest) + | Theta_ty rest -> + Theta_ty (erase_rel rest) + | Reader_ty rest -> + Reader_ty (erase_rel rest) + | Ignored_reader_ty rest -> + Ignored_reader_ty (erase_rel rest) + | End_of_fmtty -> End_of_fmtty + +(******************************************************************************) + (* Format type concatenation *) + +(* Concatenate two format types. *) +(* Used by: + * reader_nb_unifier_of_fmtty to count readers in an fmtty, + * Scanf.take_fmtty_format_readers to extract readers inside %(...%), + * CamlinternalFormat.fmtty_of_ignored_format to extract format type. *) + +(* +let rec concat_fmtty : type a b c d e f g h . + (a, b, c, d, e, f) fmtty -> + (f, b, c, e, g, h) fmtty -> + (a, b, c, d, g, h) fmtty = +*) +let rec concat_fmtty : + type a1 b1 c1 d1 e1 f1 + a2 b2 c2 d2 e2 f2 + g1 j1 g2 j2 + . + (g1, b1, c1, j1, d1, a1, + g2, b2, c2, j2, d2, a2) fmtty_rel -> + (a1, b1, c1, d1, e1, f1, + a2, b2, c2, d2, e2, f2) fmtty_rel -> + (g1, b1, c1, j1, e1, f1, + g2, b2, c2, j2, e2, f2) fmtty_rel = +fun fmtty1 fmtty2 -> match fmtty1 with + | Char_ty rest -> + Char_ty (concat_fmtty rest fmtty2) + | String_ty rest -> + String_ty (concat_fmtty rest fmtty2) + | Int_ty rest -> + Int_ty (concat_fmtty rest fmtty2) + | Int32_ty rest -> + Int32_ty (concat_fmtty rest fmtty2) + | Nativeint_ty rest -> + Nativeint_ty (concat_fmtty rest fmtty2) + | Int64_ty rest -> + Int64_ty (concat_fmtty rest fmtty2) + | Float_ty rest -> + Float_ty (concat_fmtty rest fmtty2) + | Bool_ty rest -> + Bool_ty (concat_fmtty rest fmtty2) + | Alpha_ty rest -> + Alpha_ty (concat_fmtty rest fmtty2) + | Theta_ty rest -> + Theta_ty (concat_fmtty rest fmtty2) + | Reader_ty rest -> + Reader_ty (concat_fmtty rest fmtty2) + | Ignored_reader_ty rest -> + Ignored_reader_ty (concat_fmtty rest fmtty2) + | Format_arg_ty (ty, rest) -> + Format_arg_ty (ty, concat_fmtty rest fmtty2) + | Format_subst_ty (ty1, ty2, rest) -> + Format_subst_ty (ty1, ty2, concat_fmtty rest fmtty2) + | End_of_fmtty -> fmtty2 + +(******************************************************************************) + (* Format concatenation *) + +(* Concatenate two formats. *) +let rec concat_fmt : type a b c d e f g h . + (a, b, c, d, e, f) fmt -> + (f, b, c, e, g, h) fmt -> + (a, b, c, d, g, h) fmt = +fun fmt1 fmt2 -> match fmt1 with + | String (pad, rest) -> + String (pad, concat_fmt rest fmt2) + | Caml_string (pad, rest) -> + Caml_string (pad, concat_fmt rest fmt2) + + | Int (iconv, pad, prec, rest) -> + Int (iconv, pad, prec, concat_fmt rest fmt2) + | Int32 (iconv, pad, prec, rest) -> + Int32 (iconv, pad, prec, concat_fmt rest fmt2) + | Nativeint (iconv, pad, prec, rest) -> + Nativeint (iconv, pad, prec, concat_fmt rest fmt2) + | Int64 (iconv, pad, prec, rest) -> + Int64 (iconv, pad, prec, concat_fmt rest fmt2) + | Float (fconv, pad, prec, rest) -> + Float (fconv, pad, prec, concat_fmt rest fmt2) + + | Char (rest) -> + Char (concat_fmt rest fmt2) + | Caml_char rest -> + Caml_char (concat_fmt rest fmt2) + | Bool rest -> + Bool (concat_fmt rest fmt2) + | Alpha rest -> + Alpha (concat_fmt rest fmt2) + | Theta rest -> + Theta (concat_fmt rest fmt2) + | Reader rest -> + Reader (concat_fmt rest fmt2) + | Flush rest -> + Flush (concat_fmt rest fmt2) + + | String_literal (str, rest) -> + String_literal (str, concat_fmt rest fmt2) + | Char_literal (chr, rest) -> + Char_literal (chr, concat_fmt rest fmt2) + + | Format_arg (pad, fmtty, rest) -> + Format_arg (pad, fmtty, concat_fmt rest fmt2) + | Format_subst (pad, fmtty, rest) -> + Format_subst (pad, fmtty, concat_fmt rest fmt2) + + | Scan_char_set (width_opt, char_set, rest) -> + Scan_char_set (width_opt, char_set, concat_fmt rest fmt2) + | Scan_get_counter (counter, rest) -> + Scan_get_counter (counter, concat_fmt rest fmt2) + | Ignored_param (ign, rest) -> + Ignored_param (ign, concat_fmt rest fmt2) + + | Formatting_lit (fmting_lit, rest) -> + Formatting_lit (fmting_lit, concat_fmt rest fmt2) + | Formatting_gen (fmting_gen, rest) -> + Formatting_gen (fmting_gen, concat_fmt rest fmt2) + + | End_of_format -> + fmt2 diff --git a/stdlib/camlinternalFormatBasics.mli b/stdlib/camlinternalFormatBasics.mli new file mode 100644 index 000000000..52f428ad8 --- /dev/null +++ b/stdlib/camlinternalFormatBasics.mli @@ -0,0 +1,287 @@ +(* No comments, OCaml stdlib internal use only. *) + +type padty = Left | Right | Zeros + +type int_conv = + | Int_d | Int_pd | Int_sd | Int_i | Int_pi | Int_si + | Int_x | Int_Cx | Int_X | Int_CX | Int_o | Int_Co | Int_u + +type float_conv = + | Float_f | Float_pf | Float_sf | Float_e | Float_pe | Float_se + | Float_E | Float_pE | Float_sE | Float_g | Float_pg | Float_sg + | Float_G | Float_pG | Float_sG | Float_F + +type char_set = string + +type counter = Line_counter | Char_counter | Token_counter + +type ('a, 'b) padding = + | No_padding : ('a, 'a) padding + | Lit_padding : padty * int -> ('a, 'a) padding + | Arg_padding : padty -> (int -> 'a, 'a) padding + +type pad_option = int option + +type ('a, 'b) precision = + | No_precision : ('a, 'a) precision + | Lit_precision : int -> ('a, 'a) precision + | Arg_precision : (int -> 'a, 'a) precision + +type prec_option = int option + +type block_type = Pp_hbox | Pp_vbox | Pp_hvbox | Pp_hovbox | Pp_box | Pp_fits + +type formatting_lit = + | Close_box + | Close_tag + | Break of string * int * int + | FFlush + | Force_newline + | Flush_newline + | Magic_size of string * int + | Escaped_at + | Escaped_percent + | Scan_indic of char + +type ('a, 'b, 'c, 'd, 'e, 'f) formatting_gen = + | Open_tag : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> + ('a, 'b, 'c, 'd, 'e, 'f) formatting_gen + | Open_box : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> + ('a, 'b, 'c, 'd, 'e, 'f) formatting_gen + +and ('a, 'b, 'c, 'd, 'e, 'f) fmtty = + ('a, 'b, 'c, 'd, 'e, 'f, + 'a, 'b, 'c, 'd, 'e, 'f) fmtty_rel +and ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel = +| Char_ty : (* %c *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (char -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + char -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel +| String_ty : (* %s *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (string -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + string -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel +| Int_ty : (* %d *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (int -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + int -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel +| Int32_ty : (* %ld *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (int32 -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + int32 -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel +| Nativeint_ty : (* %nd *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (nativeint -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + nativeint -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel +| Int64_ty : (* %Ld *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (int64 -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + int64 -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel +| Float_ty : (* %f *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (float -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + float -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel +| Bool_ty : (* %B *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (bool -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + bool -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel +| Format_arg_ty : (* %{...%} *) + ('g, 'h, 'i, 'j, 'k, 'l) fmtty * + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + ('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel +| Format_subst_ty : (* %(...%) *) + ('g, 'h, 'i, 'j, 'k, 'l, + 'g1, 'b1, 'c1, 'j1, 'd1, 'a1) fmtty_rel * + ('g, 'h, 'i, 'j, 'k, 'l, + 'g2, 'b2, 'c2, 'j2, 'd2, 'a2) fmtty_rel * + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'g1, 'b1, 'c1, 'j1, 'e1, 'f1, + ('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'g2, 'b2, 'c2, 'j2, 'e2, 'f2) fmtty_rel + +(* Printf and Format specific constructors. *) +| Alpha_ty : (* %a *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (('b1 -> 'x -> 'c1) -> 'x -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + ('b2 -> 'x -> 'c2) -> 'x -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel +| Theta_ty : (* %t *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + (('b1 -> 'c1) -> 'a1, 'b1, 'c1, 'd1, 'e1, 'f1, + ('b2 -> 'c2) -> 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel + +(* Scanf specific constructor. *) +| Reader_ty : (* %r *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + ('x -> 'a1, 'b1, 'c1, ('b1 -> 'x) -> 'd1, 'e1, 'f1, + 'x -> 'a2, 'b2, 'c2, ('b2 -> 'x) -> 'd2, 'e2, 'f2) fmtty_rel +| Ignored_reader_ty : (* %_r *) + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + ('a1, 'b1, 'c1, ('b1 -> 'x) -> 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, ('b2 -> 'x) -> 'd2, 'e2, 'f2) fmtty_rel + +| End_of_fmtty : + ('f1, 'b1, 'c1, 'd1, 'd1, 'f1, + 'f2, 'b2, 'c2, 'd2, 'd2, 'f2) fmtty_rel + +(**) + +(** List of format elements. *) +and ('a, 'b, 'c, 'd, 'e, 'f) fmt = +| Char : (* %c *) + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + (char -> 'a, 'b, 'c, 'd, 'e, 'f) fmt +| Caml_char : (* %C *) + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + (char -> 'a, 'b, 'c, 'd, 'e, 'f) fmt +| String : (* %s *) + ('x, string -> 'a) padding * ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('x, 'b, 'c, 'd, 'e, 'f) fmt +| Caml_string : (* %S *) + ('x, string -> 'a) padding * ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('x, 'b, 'c, 'd, 'e, 'f) fmt +| Int : (* %[dixXuo] *) + int_conv * ('x, 'y) padding * ('y, int -> 'a) precision * + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('x, 'b, 'c, 'd, 'e, 'f) fmt +| Int32 : (* %l[dixXuo] *) + int_conv * ('x, 'y) padding * ('y, int32 -> 'a) precision * + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('x, 'b, 'c, 'd, 'e, 'f) fmt +| Nativeint : (* %n[dixXuo] *) + int_conv * ('x, 'y) padding * ('y, nativeint -> 'a) precision * + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('x, 'b, 'c, 'd, 'e, 'f) fmt +| Int64 : (* %L[dixXuo] *) + int_conv * ('x, 'y) padding * ('y, int64 -> 'a) precision * + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('x, 'b, 'c, 'd, 'e, 'f) fmt +| Float : (* %[feEgGF] *) + float_conv * ('x, 'y) padding * ('y, float -> 'a) precision * + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('x, 'b, 'c, 'd, 'e, 'f) fmt +| Bool : (* %[bB] *) + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + (bool -> 'a, 'b, 'c, 'd, 'e, 'f) fmt +| Flush : (* %! *) + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('a, 'b, 'c, 'd, 'e, 'f) fmt + +| String_literal : (* abc *) + string * ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('a, 'b, 'c, 'd, 'e, 'f) fmt +| Char_literal : (* x *) + char * ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('a, 'b, 'c, 'd, 'e, 'f) fmt + +| Format_arg : (* %{...%} *) + pad_option * ('g, 'h, 'i, 'j, 'k, 'l) fmtty * + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + (('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'a, 'b, 'c, 'd, 'e, 'f) fmt +| Format_subst : (* %(...%) *) + pad_option * + ('g, 'h, 'i, 'j, 'k, 'l, + 'g2, 'b, 'c, 'j2, 'd, 'a) fmtty_rel * + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + (('g, 'h, 'i, 'j, 'k, 'l) format6 -> 'g2, 'b, 'c, 'j2, 'e, 'f) fmt + +(* Printf and Format specific constructor. *) +| Alpha : (* %a *) + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + (('b -> 'x -> 'c) -> 'x -> 'a, 'b, 'c, 'd, 'e, 'f) fmt +| Theta : (* %t *) + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + (('b -> 'c) -> 'a, 'b, 'c, 'd, 'e, 'f) fmt + +(* Format specific constructor: *) +| Formatting_lit : (* @_ *) + formatting_lit * ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('a, 'b, 'c, 'd, 'e, 'f) fmt +| Formatting_gen : (* @_ *) + ('a1, 'b, 'c, 'd1, 'e1, 'f1) formatting_gen * + ('f1, 'b, 'c, 'e1, 'e2, 'f2) fmt -> ('a1, 'b, 'c, 'd1, 'e2, 'f2) fmt + +(* Scanf specific constructors: *) +| Reader : (* %r *) + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('x -> 'a, 'b, 'c, ('b -> 'x) -> 'd, 'e, 'f) fmt +| Scan_char_set : (* %[...] *) + pad_option * char_set * ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + (string -> 'a, 'b, 'c, 'd, 'e, 'f) fmt +| Scan_get_counter : (* %[nlNL] *) + counter * ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + (int -> 'a, 'b, 'c, 'd, 'e, 'f) fmt +| Ignored_param : (* %_ *) + ('a, 'b, 'c, 'd, 'y, 'x) ignored * ('x, 'b, 'c, 'y, 'e, 'f) fmt -> + ('a, 'b, 'c, 'd, 'e, 'f) fmt + +| End_of_format : + ('f, 'b, 'c, 'e, 'e, 'f) fmt + +and ('a, 'b, 'c, 'd, 'e, 'f) ignored = + | Ignored_char : + ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_caml_char : + ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_string : + pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_caml_string : + pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_int : + int_conv * pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_int32 : + int_conv * pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_nativeint : + int_conv * pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_int64 : + int_conv * pad_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_float : + pad_option * prec_option -> ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_bool : + ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_format_arg : + pad_option * ('g, 'h, 'i, 'j, 'k, 'l) fmtty -> + ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_format_subst : + pad_option * ('a, 'b, 'c, 'd, 'e, 'f) fmtty -> + ('a, 'b, 'c, 'd, 'e, 'f) ignored + | Ignored_reader : + ('a, 'b, 'c, ('b -> 'x) -> 'd, 'd, 'a) ignored + | Ignored_scan_char_set : + pad_option * char_set -> ('a, 'b, 'c, 'd, 'd, 'a) ignored + | Ignored_scan_get_counter : + counter -> ('a, 'b, 'c, 'd, 'd, 'a) ignored + +and ('a, 'b, 'c, 'd, 'e, 'f) format6 = + Format of ('a, 'b, 'c, 'd, 'e, 'f) fmt * string + +val concat_fmtty : + ('g1, 'b1, 'c1, 'j1, 'd1, 'a1, + 'g2, 'b2, 'c2, 'j2, 'd2, 'a2) fmtty_rel -> + ('a1, 'b1, 'c1, 'd1, 'e1, 'f1, + 'a2, 'b2, 'c2, 'd2, 'e2, 'f2) fmtty_rel -> + ('g1, 'b1, 'c1, 'j1, 'e1, 'f1, + 'g2, 'b2, 'c2, 'j2, 'e2, 'f2) fmtty_rel + +val erase_rel : + ('a, 'b, 'c, 'd, 'e, 'f, + 'g, 'h, 'i, 'j, 'k, 'l) fmtty_rel -> ('a, 'b, 'c, 'd, 'e, 'f) fmtty + +val concat_fmt : + ('a, 'b, 'c, 'd, 'e, 'f) fmt -> + ('f, 'b, 'c, 'e, 'g, 'h) fmt -> + ('a, 'b, 'c, 'd, 'g, 'h) fmt diff --git a/stdlib/camlinternalOO.ml b/stdlib/camlinternalOO.ml index 872a56065..3c39c0b67 100644 --- a/stdlib/camlinternalOO.ml +++ b/stdlib/camlinternalOO.ml @@ -128,7 +128,7 @@ let rec fit_size n = let new_table pub_labels = incr table_count; let len = Array.length pub_labels in - let methods = Array.create (len*2+2) dummy_met in + let methods = Array.make (len*2+2) dummy_met in methods.(0) <- magic len; methods.(1) <- magic (fit_size len * Sys.word_size / 8 - 1); for i = 0 to len - 1 do methods.(i*2+3) <- magic pub_labels.(i) done; @@ -144,7 +144,7 @@ let new_table pub_labels = let resize array new_size = let old_size = Array.length array.methods in if new_size > old_size then begin - let new_buck = Array.create new_size dummy_met in + let new_buck = Array.make new_size dummy_met in Array.blit array.methods 0 new_buck 0 old_size; array.methods <- new_buck end @@ -267,7 +267,7 @@ let to_array arr = let new_methods_variables table meths vals = let meths = to_array meths in let nmeths = Array.length meths and nvals = Array.length vals in - let res = Array.create (nmeths + nvals) 0 in + let res = Array.make (nmeths + nvals) 0 in for i = 0 to nmeths - 1 do res.(i) <- get_method_label table meths.(i) done; diff --git a/stdlib/digest.ml b/stdlib/digest.ml index 2baf3dbfa..14cb4ebd9 100644 --- a/stdlib/digest.ml +++ b/stdlib/digest.ml @@ -23,11 +23,15 @@ external channel: in_channel -> int -> t = "caml_md5_chan" let string str = unsafe_string str 0 (String.length str) +let bytes b = string (Bytes.unsafe_to_string b) + let substring str ofs len = if ofs < 0 || len < 0 || ofs > String.length str - len then invalid_arg "Digest.substring" else unsafe_string str ofs len +let subbytes b ofs len = substring (Bytes.unsafe_to_string b) ofs len + let file filename = let ic = open_in_bin filename in let d = channel ic (-1) in @@ -35,23 +39,21 @@ let file filename = d let output chan digest = - output chan digest 0 16 + output_string chan digest -let input chan = - let digest = String.create 16 in - really_input chan digest 0 16; - digest +let input chan = really_input_string chan 16 -let char_hex n = Char.unsafe_chr (n + if n < 10 then Char.code '0' else (Char.code 'a' - 10)) +let char_hex n = + Char.unsafe_chr (n + if n < 10 then Char.code '0' else (Char.code 'a' - 10)) let to_hex d = - let result = String.create 32 in + let result = Bytes.create 32 in for i = 0 to 15 do let x = Char.code d.[i] in - String.unsafe_set result (i*2) (char_hex (x lsr 4)); - String.unsafe_set result (i*2+1) (char_hex (x land 0x0f)); + Bytes.unsafe_set result (i*2) (char_hex (x lsr 4)); + Bytes.unsafe_set result (i*2+1) (char_hex (x land 0x0f)); done; - result + Bytes.unsafe_to_string result let from_hex s = if String.length s <> 32 then raise (Invalid_argument "Digest.from_hex"); @@ -63,8 +65,8 @@ let from_hex s = | _ -> raise (Invalid_argument "Digest.from_hex") in let byte i = digit s.[i] lsl 4 + digit s.[i+1] in - let result = String.create 16 in + let result = Bytes.create 16 in for i = 0 to 15 do - result.[i] <- Char.chr (byte (2 * i)); + Bytes.set result i (Char.chr (byte (2 * i))); done; - result + Bytes.unsafe_to_string result diff --git a/stdlib/digest.mli b/stdlib/digest.mli index 7fa1f15d6..583d2a46b 100644 --- a/stdlib/digest.mli +++ b/stdlib/digest.mli @@ -36,10 +36,16 @@ val compare : t -> t -> int val string : string -> t (** Return the digest of the given string. *) +val bytes : bytes -> t +(** Return the digest of the given byte sequence. *) + val substring : string -> int -> int -> t (** [Digest.substring s ofs len] returns the digest of the substring - of [s] starting at character number [ofs] and containing [len] - characters. *) + of [s] starting at index [ofs] and containing [len] characters. *) + +val subbytes : bytes -> int -> int -> t +(** [Digest.subbytes s ofs len] returns the digest of the subsequence + of [s] starting at index [ofs] and containing [len] bytes. *) external channel : in_channel -> int -> t = "caml_md5_chan" (** If [len] is nonnegative, [Digest.channel ic len] reads [len] diff --git a/stdlib/filename.mli b/stdlib/filename.mli index c44c6d954..a4ea3aaab 100644 --- a/stdlib/filename.mli +++ b/stdlib/filename.mli @@ -117,14 +117,13 @@ val set_temp_dir_name : string -> unit @since 4.00.0 *) -val temp_dir_name : string -(** @deprecated The name of the initial temporary directory: +val temp_dir_name : string [@@ocaml.deprecated] +(** The name of the initial temporary directory: Under Unix, the value of the [TMPDIR] environment variable, or "/tmp" if the variable is not set. Under Windows, the value of the [TEMP] environment variable, or "." if the variable is not set. - This function is deprecated; {!Filename.get_temp_dir_name} should be - used instead. + @deprecated You should use {!Filename.get_temp_dir_name} instead. @since 3.09.1 *) diff --git a/stdlib/format.ml b/stdlib/format.ml index 02222932e..5e206e11f 100644 --- a/stdlib/format.ml +++ b/stdlib/format.ml @@ -29,6 +29,10 @@ external int_of_size : size -> int = "%identity" (* Tokens are one of the following : *) +type block_type + = CamlinternalFormatBasics.block_type + = Pp_hbox | Pp_vbox | Pp_hvbox | Pp_hovbox | Pp_box | Pp_fits + type pp_token = | Pp_text of string (* normal text *) | Pp_break of int * int (* complete break *) @@ -46,21 +50,7 @@ type pp_token = and tag = string -and block_type = -| Pp_hbox (* Horizontal block no line breaking *) -| Pp_vbox (* Vertical block each break leads to a new line *) -| Pp_hvbox (* Horizontal-vertical block: same as vbox, except if this block - is small enough to fit on a single line *) -| Pp_hovbox (* Horizontal or Vertical block: breaks lead to new line - only when necessary to print the content of the block *) -| Pp_box (* Horizontal or Indent block: breaks lead to new line - only when necessary to print the content of the block, or - when it leads to a new indentation of the current line *) -| Pp_fits (* Internal usage: when a block fits on a single line *) - -and tblock = - | Pp_tbox of int list ref (* Tabulation box *) -;; +and tblock = Pp_tbox of int list ref (* Tabulation box *) (* The Queue: contains all formatting elements. @@ -241,7 +231,6 @@ let pp_infinity = 1000000010;; let pp_output_string state s = state.pp_out_string s 0 (String.length s) and pp_output_newline state = state.pp_out_newline () and pp_output_spaces state n = state.pp_out_spaces n -;; (* To format a break, indenting a new line. *) let break_new_line state offset width = @@ -657,9 +646,7 @@ let pp_print_bool state b = pp_print_string state (string_of_bool b);; (* To format a char. *) let pp_print_char state c = - let s = String.create 1 in - s.[0] <- c; - pp_print_as state 1 s + pp_print_as state 1 (String.make 1 c) ;; (* Opening boxes. *) @@ -901,7 +888,7 @@ let rec display_blanks state n = ;; let pp_set_formatter_out_channel state os = - state.pp_out_string <- output os; + state.pp_out_string <- output_substring os; state.pp_out_flush <- (fun () -> flush os); state.pp_out_newline <- display_newline state; state.pp_out_spaces <- display_blanks state; @@ -967,7 +954,7 @@ let make_formatter output flush = ;; let formatter_of_out_channel oc = - make_formatter (output oc) (fun () -> flush oc) + make_formatter (output_substring oc) (fun () -> flush oc) ;; let formatter_of_buffer b = @@ -1071,309 +1058,106 @@ and set_tags = pp_set_tags std_formatter ;; - -(************************************************************** - - Printf implementation. - - **************************************************************) - -module Sformat = Printf.CamlinternalPr.Sformat;; -module Tformat = Printf.CamlinternalPr.Tformat;; - -(* Error messages when processing formats. *) - -(* Trailer: giving up at character number ... *) -let giving_up mess fmt i = - Printf.sprintf - "Format.fprintf: %s \'%s\', giving up at character number %d%s" - mess (Sformat.to_string fmt) i - (if i < Sformat.length fmt - then Printf.sprintf " (%c)." (Sformat.get fmt i) - else Printf.sprintf "%c" '.') -;; - -(* When an invalid format deserves a special error explanation. *) -let format_invalid_arg mess fmt i = invalid_arg (giving_up mess fmt i);; - -(* Standard invalid format. *) -let invalid_format fmt i = format_invalid_arg "bad format" fmt i;; - -(* Cannot find a valid integer into that format. *) -let invalid_integer fmt i = - invalid_arg (giving_up "bad integer specification" fmt i);; - -(* Finding an integer size out of a sub-string of the format. *) -let format_int_of_string fmt i s = - let sz = - try int_of_string s with - | Failure _ -> invalid_integer fmt i in - size_of_int sz -;; - -(* Getting strings out of buffers. *) -let get_buffer_out b = - let s = Buffer.contents b in - Buffer.reset b; - s -;; - -(* [ppf] is supposed to be a pretty-printer that outputs to buffer [b]: - to extract the contents of [ppf] as a string we flush [ppf] and get the - string out of [b]. *) -let string_out b ppf = - pp_flush_queue ppf false; - get_buffer_out b -;; - -(* Applies [printer] to a formatter that outputs on a fresh buffer, - then returns the resulting material. *) -let exstring printer arg = - let b = Buffer.create 512 in - let ppf = formatter_of_buffer b in - printer ppf arg; - string_out b ppf -;; - -(* To turn out a character accumulator into the proper string result. *) -let implode_rev s0 = function - | [] -> s0 - | l -> String.concat "" (List.rev (s0 :: l)) -;; - -(* [mkprintf] is the printf-like function generator: given the - - [to_s] flag that tells if we are printing into a string, - - the [get_out] function that has to be called to get a [ppf] function to - output onto, - it generates a [kprintf] function that takes as arguments a [k] - continuation function to be called at the end of formatting, - and a printing format string to print the rest of the arguments - according to the format string. - Regular [fprintf]-like functions of this module are obtained via partial - applications of [mkprintf]. *) -let mkprintf to_s get_out k fmt = - - (* [out] is global to this definition of [pr], and must be shared by all its - recursive calls (if any). *) - let out = get_out fmt in - let print_as = ref None in - let outc c = - match !print_as with - | None -> pp_print_char out c - | Some size -> - pp_print_as_size out size (String.make 1 c); - print_as := None - and outs s = - match !print_as with - | None -> pp_print_string out s - | Some size -> - pp_print_as_size out size s; - print_as := None - and flush out = pp_print_flush out () in - - let rec pr k n fmt v = - - let len = Sformat.length fmt in - - let rec doprn n i = - if i >= len then Obj.magic (k out) else - match Sformat.get fmt i with - | '%' -> - Tformat.scan_format fmt v n i cont_s cont_a cont_t cont_f cont_m - | '@' -> - let i = succ i in - if i >= len then invalid_format fmt i else - begin match Sformat.get fmt i with - | '[' -> - do_pp_open_box out n (succ i) - | ']' -> - pp_close_box out (); - doprn n (succ i) - | '{' -> - do_pp_open_tag out n (succ i) - | '}' -> - pp_close_tag out (); - doprn n (succ i) - | ' ' -> - pp_print_space out (); - doprn n (succ i) - | ',' -> - pp_print_cut out (); - doprn n (succ i) - | '?' -> - pp_print_flush out (); - doprn n (succ i) - | '.' -> - pp_print_newline out (); - doprn n (succ i) - | '\n' -> - pp_force_newline out (); - doprn n (succ i) - | ';' -> - do_pp_break out n (succ i) - | '<' -> - let got_size size n i = - print_as := Some size; - doprn n (skip_gt i) in - get_int n (succ i) got_size - | '@' -> - outc '@'; - doprn n (succ i) - | _ -> invalid_format fmt i - end - | c -> outc c; doprn n (succ i) - - and cont_s n s i = - outs s; doprn n i - and cont_a n printer arg i = - if to_s then - outs ((Obj.magic printer : unit -> _ -> string) () arg) - else - printer out arg; - doprn n i - and cont_t n printer i = - if to_s then - outs ((Obj.magic printer : unit -> string) ()) - else - printer out; - doprn n i - and cont_f n i = - flush out; doprn n i - and cont_m n xf i = - let m = - Sformat.add_int_index - (Tformat.count_printing_arguments_of_format xf) n in - pr (Obj.magic (fun _ -> doprn m i)) n xf v - - and get_int n i c = - if i >= len then invalid_integer fmt i else - match Sformat.get fmt i with - | ' ' -> get_int n (succ i) c - | '%' -> - let cont_s n s i = c (format_int_of_string fmt i s) n i - and cont_a _n _printer _arg i = invalid_integer fmt i - and cont_t _n _printer i = invalid_integer fmt i - and cont_f _n i = invalid_integer fmt i - and cont_m _n _sfmt i = invalid_integer fmt i in - Tformat.scan_format fmt v n i cont_s cont_a cont_t cont_f cont_m - | _ -> - let rec get j = - if j >= len then invalid_integer fmt j else - match Sformat.get fmt j with - | '0' .. '9' | '-' -> get (succ j) - | _ -> - let size = - if j = i then size_of_int 0 else - let s = Sformat.sub fmt (Sformat.index_of_int i) (j - i) in - format_int_of_string fmt j s in - c size n j in - get i - - and skip_gt i = - if i >= len then invalid_format fmt i else - match Sformat.get fmt i with - | ' ' -> skip_gt (succ i) - | '>' -> succ i - | _ -> invalid_format fmt i - - and get_box_kind i = - if i >= len then Pp_box, i else - match Sformat.get fmt i with - | 'h' -> - let i = succ i in - if i >= len then Pp_hbox, i else - begin match Sformat.get fmt i with - | 'o' -> - let i = succ i in - if i >= len then format_invalid_arg "bad box format" fmt i else - begin match Sformat.get fmt i with - | 'v' -> Pp_hovbox, succ i - | c -> - format_invalid_arg - ("bad box name ho" ^ String.make 1 c) fmt i - end - | 'v' -> Pp_hvbox, succ i - | _ -> Pp_hbox, i - end - | 'b' -> Pp_box, succ i - | 'v' -> Pp_vbox, succ i - | _ -> Pp_box, i - - and get_tag_name n i c = - let rec get accu n i j = - if j >= len then - c (implode_rev - (Sformat.sub fmt (Sformat.index_of_int i) (j - i)) - accu) - n j else - match Sformat.get fmt j with - | '>' -> - c (implode_rev - (Sformat.sub fmt (Sformat.index_of_int i) (j - i)) - accu) - n j - | '%' -> - let s0 = Sformat.sub fmt (Sformat.index_of_int i) (j - i) in - let cont_s n s i = get (s :: s0 :: accu) n i i - and cont_a n printer arg i = - let s = - if to_s - then (Obj.magic printer : unit -> _ -> string) () arg - else exstring printer arg in - get (s :: s0 :: accu) n i i - and cont_t n printer i = - let s = - if to_s - then (Obj.magic printer : unit -> string) () - else exstring (fun ppf () -> printer ppf) () in - get (s :: s0 :: accu) n i i - and cont_f _n i = - format_invalid_arg "bad tag name specification" fmt i - and cont_m _n _sfmt i = - format_invalid_arg "bad tag name specification" fmt i in - Tformat.scan_format fmt v n j cont_s cont_a cont_t cont_f cont_m - | _ -> get accu n i (succ j) in - get [] n i i - - and do_pp_break ppf n i = - if i >= len then begin pp_print_space ppf (); doprn n i end else - match Sformat.get fmt i with - | '<' -> - let rec got_nspaces nspaces n i = - get_int n i (got_offset nspaces) - and got_offset nspaces offset n i = - pp_print_break ppf (int_of_size nspaces) (int_of_size offset); - doprn n (skip_gt i) in - get_int n (succ i) got_nspaces - | _c -> pp_print_space ppf (); doprn n i - - and do_pp_open_box ppf n i = - if i >= len then begin pp_open_box_gen ppf 0 Pp_box; doprn n i end else - match Sformat.get fmt i with - | '<' -> - let kind, i = get_box_kind (succ i) in - let got_size size n i = - pp_open_box_gen ppf (int_of_size size) kind; - doprn n (skip_gt i) in - get_int n i got_size - | _c -> pp_open_box_gen ppf 0 Pp_box; doprn n i - - and do_pp_open_tag ppf n i = - if i >= len then begin pp_open_tag ppf ""; doprn n i end else - match Sformat.get fmt i with - | '<' -> - let got_name tag_name n i = - pp_open_tag ppf tag_name; - doprn n (skip_gt i) in - get_tag_name n (succ i) got_name - | _c -> pp_open_tag ppf ""; doprn n i in - - doprn n 0 in - - let kpr = pr k (Sformat.index_of_int 0) in - - Tformat.kapr kpr fmt -;; + (**************************************************************) + +let compute_tag output tag_acc = + let buf = Buffer.create 16 in + let ppf = formatter_of_buffer buf in + let () = output ppf tag_acc in + let () = pp_print_flush ppf () in + let len = Buffer.length buf in + if len < 2 then Buffer.contents buf + else Buffer.sub buf 1 (len - 2) + + (************************************************************** + + Defining continuations to be passed as arguments of + CamlinternalFormat.make_printf. + + **************************************************************) + +open CamlinternalFormatBasics +open CamlinternalFormat + +(* Interpret a formatting entity on a formatter. *) +let output_formatting_lit ppf fmting_lit = match fmting_lit with + | Close_box -> pp_close_box ppf () + | Close_tag -> pp_close_tag ppf () + | Break (_, width, offset) -> pp_print_break ppf width offset + | FFlush -> pp_print_flush ppf () + | Force_newline -> pp_force_newline ppf () + | Flush_newline -> pp_print_newline ppf () + | Magic_size (_, _) -> () + | Escaped_at -> pp_print_char ppf '@' + | Escaped_percent -> pp_print_char ppf '%' + | Scan_indic c -> pp_print_char ppf '@'; pp_print_char ppf c + +(* Recursively output an "accumulator" containing a reversed list of + printing entities (string, char, flus, ...) in an output_stream. *) +(* Differ from Printf.output_acc by the interpretation of formatting. *) +(* Used as a continuation of CamlinternalFormat.make_printf. *) +let rec output_acc ppf acc = match acc with + | Acc_string_literal (Acc_formatting_lit (p, Magic_size (_, size)), s) + | Acc_data_string (Acc_formatting_lit (p, Magic_size (_, size)), s) -> + output_acc ppf p; + pp_print_as_size ppf (size_of_int size) s; + | Acc_char_literal (Acc_formatting_lit (p, Magic_size (_, size)), c) + | Acc_data_char (Acc_formatting_lit (p, Magic_size (_, size)), c) -> + output_acc ppf p; + pp_print_as_size ppf (size_of_int size) (String.make 1 c); + | Acc_formatting_lit (p, f) -> + output_acc ppf p; + output_formatting_lit ppf f; + | Acc_formatting_gen (p, Acc_open_tag acc') -> + output_acc ppf p; + pp_open_tag ppf (compute_tag output_acc acc') + | Acc_formatting_gen (p, Acc_open_box acc') -> + let () = output_acc ppf p in + let (indent, bty) = open_box_of_string (compute_tag output_acc acc') in + pp_open_box_gen ppf indent bty + | Acc_string_literal (p, s) + | Acc_data_string (p, s) -> output_acc ppf p; pp_print_string ppf s; + | Acc_char_literal (p, c) + | Acc_data_char (p, c) -> output_acc ppf p; pp_print_char ppf c; + | Acc_delay (p, f) -> output_acc ppf p; f ppf; + | Acc_flush p -> output_acc ppf p; pp_print_flush ppf (); + | Acc_invalid_arg (p, msg) -> output_acc ppf p; invalid_arg msg; + | End_of_acc -> () + +(* Recursively output an "accumulator" containing a reversed list of + printing entities (string, char, flus, ...) in a buffer. *) +(* Differ from Printf.bufput_acc by the interpretation of formatting. *) +(* Used as a continuation of CamlinternalFormat.make_printf. *) +let rec strput_acc ppf acc = match acc with + | Acc_string_literal (Acc_formatting_lit (p, Magic_size (_, size)), s) + | Acc_data_string (Acc_formatting_lit (p, Magic_size (_, size)), s) -> + strput_acc ppf p; + pp_print_as_size ppf (size_of_int size) s; + | Acc_char_literal (Acc_formatting_lit (p, Magic_size (_, size)), c) + | Acc_data_char (Acc_formatting_lit (p, Magic_size (_, size)), c) -> + strput_acc ppf p; + pp_print_as_size ppf (size_of_int size) (String.make 1 c); + | Acc_delay (Acc_formatting_lit (p, Magic_size (_, size)), f) -> + strput_acc ppf p; + pp_print_as_size ppf (size_of_int size) (f ()); + | Acc_formatting_lit (p, f) -> + strput_acc ppf p; + output_formatting_lit ppf f; + | Acc_formatting_gen (p, Acc_open_tag acc') -> + strput_acc ppf p; + pp_open_tag ppf (compute_tag strput_acc acc') + | Acc_formatting_gen (p, Acc_open_box acc') -> + let () = strput_acc ppf p in + let (indent, bty) = open_box_of_string (compute_tag strput_acc acc') in + pp_open_box_gen ppf indent bty + | Acc_string_literal (p, s) + | Acc_data_string (p, s) -> strput_acc ppf p; pp_print_string ppf s; + | Acc_char_literal (p, c) + | Acc_data_char (p, c) -> strput_acc ppf p; pp_print_char ppf c; + | Acc_delay (p, f) -> strput_acc ppf p; pp_print_string ppf (f ()); + | Acc_flush p -> strput_acc ppf p; pp_print_flush ppf (); + | Acc_invalid_arg (p, msg) -> strput_acc ppf p; invalid_arg msg; + | End_of_acc -> () (************************************************************** @@ -1381,30 +1165,37 @@ let mkprintf to_s get_out k fmt = **************************************************************) -let kfprintf k ppf = mkprintf false (fun _ -> ppf) k;; -let ikfprintf k ppf = Tformat.kapr (fun _ _ -> Obj.magic (k ppf));; - -let fprintf ppf = kfprintf ignore ppf;; -let ifprintf ppf = ikfprintf ignore ppf;; -let printf fmt = fprintf std_formatter fmt;; -let eprintf fmt = fprintf err_formatter fmt;; - -let ksprintf k = +let kfprintf k o (Format (fmt, _)) = + make_printf (fun o acc -> output_acc o acc; k o) o End_of_acc fmt +let ikfprintf k x (Format (fmt, _)) = + make_printf (fun _ _ -> k x) x End_of_acc fmt + +let fprintf ppf fmt = kfprintf ignore ppf fmt +let ifprintf ppf fmt = ikfprintf ignore ppf fmt +let printf fmt = fprintf std_formatter fmt +let eprintf fmt = fprintf err_formatter fmt + +let ksprintf k (Format (fmt, _)) = + let k' () acc = + let b = Buffer.create 512 in + let ppf = formatter_of_buffer b in + strput_acc ppf acc; + pp_flush_queue ppf false; + k (Buffer.contents b) in + make_printf k' () End_of_acc fmt + +let sprintf fmt = + ksprintf (fun s -> s) fmt + +let asprintf (Format (fmt, _)) = let b = Buffer.create 512 in - let k ppf = k (string_out b ppf) in let ppf = formatter_of_buffer b in - let get_out _ = ppf in - mkprintf true get_out k -;; - -let sprintf fmt = ksprintf (fun s -> s) fmt;; - -let asprintf fmt = - let b = Buffer.create 512 in - let k ppf = string_out b ppf in - let ppf = formatter_of_buffer b in - let get_out _ = ppf in - mkprintf false get_out k fmt;; + let k' : (formatter -> (formatter, unit) acc -> string) + = fun ppf acc -> + output_acc ppf acc; + pp_flush_queue ppf false; + Buffer.contents b in + make_printf k' ppf End_of_acc fmt (************************************************************** @@ -1412,15 +1203,10 @@ let asprintf fmt = **************************************************************) -let kbprintf k b = - mkprintf false (fun _ -> formatter_of_buffer b) k -;; - (* Deprecated error prone function bprintf. *) -let bprintf b = - let k ppf = pp_flush_queue ppf false in - kbprintf k b -;; +let bprintf b (Format (fmt, _) : ('a, formatter, unit) format) = + let k ppf acc = output_acc ppf acc; pp_flush_queue ppf false in + make_printf k (formatter_of_buffer b) End_of_acc fmt (* Deprecated alias for ksprintf. *) let kprintf = ksprintf;; diff --git a/stdlib/format.mli b/stdlib/format.mli index e7cbe506e..b44fc0a94 100644 --- a/stdlib/format.mli +++ b/stdlib/format.mli @@ -714,14 +714,18 @@ val ksprintf : (string -> 'a) -> ('b, unit, string, 'a) format4 -> 'b;; (** {6 Deprecated} *) -val bprintf : Buffer.t -> ('a, formatter, unit) format -> 'a;; +val bprintf : Buffer.t -> ('a, formatter, unit) format -> 'a + [@@ocaml.deprecated] +;; (** @deprecated This function is error prone. Do not use it. If you need to print to some buffer [b], you must first define a formatter writing to [b], using [let to_b = formatter_of_buffer b]; then use regular calls to [Format.fprintf] on formatter [to_b]. *) -val kprintf : (string -> 'a) -> ('b, unit, string, 'a) format4 -> 'b;; +val kprintf : (string -> 'a) -> ('b, unit, string, 'a) format4 -> 'b + [@@ocaml.deprecated] +;; (** @deprecated An alias for [ksprintf]. *) val set_all_formatter_output_functions : @@ -730,6 +734,7 @@ val set_all_formatter_output_functions : newline:(unit -> unit) -> spaces:(int -> unit) -> unit +[@@ocaml.deprecated] ;; (** @deprecated Subsumed by [set_formatter_out_functions]. *) @@ -740,12 +745,14 @@ val get_all_formatter_output_functions : (unit -> unit) * (unit -> unit) * (int -> unit) +[@@ocaml.deprecated] ;; (** @deprecated Subsumed by [get_formatter_out_functions]. *) val pp_set_all_formatter_output_functions : formatter -> out:(string -> int -> int -> unit) -> flush:(unit -> unit) -> newline:(unit -> unit) -> spaces:(int -> unit) -> unit +[@@ocaml.deprecated] ;; (** @deprecated Subsumed by [pp_set_formatter_out_functions]. *) @@ -754,6 +761,7 @@ val pp_get_all_formatter_output_functions : formatter -> unit -> (string -> int -> int -> unit) * (unit -> unit) * (unit -> unit) * (int -> unit) +[@@ocaml.deprecated] ;; (** @deprecated Subsumed by [pp_get_formatter_out_functions]. *) diff --git a/stdlib/gc.mli b/stdlib/gc.mli index d2f2d9761..f86a1e687 100644 --- a/stdlib/gc.mli +++ b/stdlib/gc.mli @@ -83,7 +83,7 @@ type stat = type control = { mutable minor_heap_size : int; (** The size (in words) of the minor heap. Changing - this parameter will trigger a minor collection. Default: 32k. *) + this parameter will trigger a minor collection. Default: 256k. *) mutable major_heap_increment : int; (** How much to add to the major heap when increasing it. If this @@ -131,7 +131,7 @@ type control = mutable stack_limit : int; (** The maximum size of the stack (in words). This is only relevant to the byte-code runtime, as the native code runtime - uses the operating system's stack. Default: 256k. *) + uses the operating system's stack. Default: 1024k. *) mutable allocation_policy : int; (** The policy used for allocating in the heap. Possible @@ -215,6 +215,9 @@ val finalise : ('a -> unit) -> 'a -> unit before the values it depends upon. Of course, this becomes false if additional dependencies are introduced by assignments. + In the presence of multiple OCaml threads it should be assumed that + any particular finaliser may be executed in any of the threads. + Anything reachable from the closure of finalisation functions is considered reachable, so the following code will not work as expected: @@ -249,7 +252,7 @@ val finalise : ('a -> unit) -> 'a -> unit another copy is still in use by the program. - The results of calling {!String.make}, {!String.create}, + The results of calling {!String.make}, {!Bytes.make}, {!Bytes.create}, {!Array.make}, and {!Pervasives.ref} are guaranteed to be heap-allocated and non-constant except when the length argument is [0]. *) diff --git a/stdlib/genlex.ml b/stdlib/genlex.ml index dc80727df..4f52b1865 100644 --- a/stdlib/genlex.ml +++ b/stdlib/genlex.ml @@ -21,7 +21,7 @@ type token = (* The string buffering machinery *) -let initial_buffer = String.create 32 +let initial_buffer = Bytes.create 32 let buffer = ref initial_buffer let bufpos = ref 0 @@ -29,16 +29,16 @@ let bufpos = ref 0 let reset_buffer () = buffer := initial_buffer; bufpos := 0 let store c = - if !bufpos >= String.length !buffer then - begin - let newbuffer = String.create (2 * !bufpos) in - String.blit !buffer 0 newbuffer 0 !bufpos; buffer := newbuffer - end; - String.set !buffer !bufpos c; + if !bufpos >= Bytes.length !buffer then begin + let newbuffer = Bytes.create (2 * !bufpos) in + Bytes.blit !buffer 0 newbuffer 0 !bufpos; + buffer := newbuffer + end; + Bytes.set !buffer !bufpos c; incr bufpos let get_string () = - let s = String.sub !buffer 0 !bufpos in buffer := initial_buffer; s + let s = Bytes.sub_string !buffer 0 !bufpos in buffer := initial_buffer; s (* The lexer *) diff --git a/stdlib/hashtbl.mli b/stdlib/hashtbl.mli index 5424df40d..0c3e4999f 100644 --- a/stdlib/hashtbl.mli +++ b/stdlib/hashtbl.mli @@ -187,6 +187,34 @@ val stats : ('a, 'b) t -> statistics (** {6 Functorial interface} *) +(** The functorial interface allows the use of specific comparison + and hash functions, either for performance/security concerns, + or because keys are not hashable/comparable with the polymorphic builtins. + + For instance, one might want to specialize a table for integer keys: + {[ + module IntHash = + struct + type t = int + let equal i j = i=j + let hash i = i land max_int + end + + module IntHashtbl = Hashtbl.Make(IntHash) + + let h = IntHashtbl.create 17 in + IntHashtbl.add h 12 "hello";; + ]} + + This creates a new module [IntHashtbl], with a new type ['a + IntHashtbl.t] of tables from [int] to ['a]. In this example, [h] + contains [string] values so its type is [string IntHashtbl.t]. + + Note that the new type ['a IntHashtbl.t] is not compatible with + the type [('a,'b) Hashtbl.t] of the generic interface. For + example, [Hashtbl.length h] would not type-check, you must use + [IntHashtbl.length]. +*) module type HashedType = sig diff --git a/stdlib/header.c b/stdlib/header.c index cb3d9953a..93cdfeb2d 100644 --- a/stdlib/header.c +++ b/stdlib/header.c @@ -133,7 +133,7 @@ static char * read_runtime_path(int fd) char buffer[TRAILER_SIZE]; static char runtime_path[MAXPATHLEN]; int num_sections, i; - uint32 path_size; + uint32_t path_size; long ofs; lseek(fd, (long) -TRAILER_SIZE, SEEK_END); diff --git a/stdlib/lazy.mli b/stdlib/lazy.mli index 6108a715c..6ade2e3d4 100644 --- a/stdlib/lazy.mli +++ b/stdlib/lazy.mli @@ -75,11 +75,11 @@ val is_val : 'a t -> bool;; did not raise an exception. @since 4.00.0 *) -val lazy_from_fun : (unit -> 'a) -> 'a t;; +val lazy_from_fun : (unit -> 'a) -> 'a t [@@ocaml.deprecated];; (** @deprecated synonym for [from_fun]. *) -val lazy_from_val : 'a -> 'a t;; +val lazy_from_val : 'a -> 'a t [@@ocaml.deprecated];; (** @deprecated synonym for [from_val]. *) -val lazy_is_val : 'a t -> bool;; +val lazy_is_val : 'a t -> bool [@@ocaml.deprecated];; (** @deprecated synonym for [is_val]. *) diff --git a/stdlib/lexing.ml b/stdlib/lexing.ml index 53748ad86..d2231fbe9 100644 --- a/stdlib/lexing.ml +++ b/stdlib/lexing.ml @@ -29,7 +29,7 @@ let dummy_pos = { type lexbuf = { refill_buff : lexbuf -> unit; - mutable lex_buffer : string; + mutable lex_buffer : bytes; mutable lex_buffer_len : int; mutable lex_abs_pos : int; mutable lex_start_pos : int; @@ -81,7 +81,7 @@ let new_engine tbl state buf = let lex_refill read_fun aux_buffer lexbuf = let read = - read_fun aux_buffer (String.length aux_buffer) in + read_fun aux_buffer (Bytes.length aux_buffer) in let n = if read > 0 then read @@ -90,16 +90,16 @@ let lex_refill read_fun aux_buffer lexbuf = <-------|---------------------|-----------> | junk | valid data | junk | ^ ^ ^ ^ - 0 start_pos buffer_end String.length buffer + 0 start_pos buffer_end Bytes.length buffer *) - if lexbuf.lex_buffer_len + n > String.length lexbuf.lex_buffer then begin + if lexbuf.lex_buffer_len + n > Bytes.length lexbuf.lex_buffer then begin (* There is not enough space at the end of the buffer *) if lexbuf.lex_buffer_len - lexbuf.lex_start_pos + n - <= String.length lexbuf.lex_buffer + <= Bytes.length lexbuf.lex_buffer then begin (* But there is enough space if we reclaim the junk at the beginning of the buffer *) - String.blit lexbuf.lex_buffer lexbuf.lex_start_pos + Bytes.blit lexbuf.lex_buffer lexbuf.lex_start_pos lexbuf.lex_buffer 0 (lexbuf.lex_buffer_len - lexbuf.lex_start_pos) end else begin @@ -107,12 +107,12 @@ let lex_refill read_fun aux_buffer lexbuf = space since n <= String.length aux_buffer <= String.length buffer. Watch out for string length overflow, though. *) let newlen = - min (2 * String.length lexbuf.lex_buffer) Sys.max_string_length in + min (2 * Bytes.length lexbuf.lex_buffer) Sys.max_string_length in if lexbuf.lex_buffer_len - lexbuf.lex_start_pos + n > newlen then failwith "Lexing.lex_refill: cannot grow buffer"; - let newbuf = String.create newlen in + let newbuf = Bytes.create newlen in (* Copy the valid data to the beginning of the new buffer *) - String.blit lexbuf.lex_buffer lexbuf.lex_start_pos + Bytes.blit lexbuf.lex_buffer lexbuf.lex_start_pos newbuf 0 (lexbuf.lex_buffer_len - lexbuf.lex_start_pos); lexbuf.lex_buffer <- newbuf @@ -133,9 +133,7 @@ let lex_refill read_fun aux_buffer lexbuf = done end; (* There is now enough space at the end of the buffer *) - String.blit aux_buffer 0 - lexbuf.lex_buffer lexbuf.lex_buffer_len - n; + Bytes.blit aux_buffer 0 lexbuf.lex_buffer lexbuf.lex_buffer_len n; lexbuf.lex_buffer_len <- lexbuf.lex_buffer_len + n let zero_pos = { @@ -146,8 +144,8 @@ let zero_pos = { };; let from_function f = - { refill_buff = lex_refill f (String.create 512); - lex_buffer = String.create 1024; + { refill_buff = lex_refill f (Bytes.create 512); + lex_buffer = Bytes.create 1024; lex_buffer_len = 0; lex_abs_pos = 0; lex_start_pos = 0; @@ -165,7 +163,8 @@ let from_channel ic = let from_string s = { refill_buff = (fun lexbuf -> lexbuf.lex_eof_reached <- true); - lex_buffer = s ^ ""; + lex_buffer = Bytes.of_string s; (* have to make a copy for compatibility + with unsafe-string mode *) lex_buffer_len = String.length s; lex_abs_pos = 0; lex_start_pos = 0; @@ -180,37 +179,31 @@ let from_string s = let lexeme lexbuf = let len = lexbuf.lex_curr_pos - lexbuf.lex_start_pos in - let s = String.create len in - String.unsafe_blit lexbuf.lex_buffer lexbuf.lex_start_pos s 0 len; - s + Bytes.sub_string lexbuf.lex_buffer lexbuf.lex_start_pos len let sub_lexeme lexbuf i1 i2 = let len = i2-i1 in - let s = String.create len in - String.unsafe_blit lexbuf.lex_buffer i1 s 0 len; - s + Bytes.sub_string lexbuf.lex_buffer i1 len let sub_lexeme_opt lexbuf i1 i2 = if i1 >= 0 then begin let len = i2-i1 in - let s = String.create len in - String.unsafe_blit lexbuf.lex_buffer i1 s 0 len; - Some s + Some (Bytes.sub_string lexbuf.lex_buffer i1 len) end else begin None end -let sub_lexeme_char lexbuf i = lexbuf.lex_buffer.[i] +let sub_lexeme_char lexbuf i = Bytes.get lexbuf.lex_buffer i let sub_lexeme_char_opt lexbuf i = if i >= 0 then - Some lexbuf.lex_buffer.[i] + Some (Bytes.get lexbuf.lex_buffer i) else None let lexeme_char lexbuf i = - String.get lexbuf.lex_buffer (lexbuf.lex_start_pos + i) + Bytes.get lexbuf.lex_buffer (lexbuf.lex_start_pos + i) let lexeme_start lexbuf = lexbuf.lex_start_p.pos_cnum;; let lexeme_end lexbuf = lexbuf.lex_curr_p.pos_cnum;; diff --git a/stdlib/lexing.mli b/stdlib/lexing.mli index 6d5406d69..30898670b 100644 --- a/stdlib/lexing.mli +++ b/stdlib/lexing.mli @@ -46,7 +46,7 @@ val dummy_pos : position;; type lexbuf = { refill_buff : lexbuf -> unit; - mutable lex_buffer : string; + mutable lex_buffer : bytes; mutable lex_buffer_len : int; mutable lex_abs_pos : int; mutable lex_start_pos : int; @@ -84,12 +84,12 @@ val from_string : string -> lexbuf the string. An end-of-input condition is generated when the end of the string is reached. *) -val from_function : (string -> int -> int) -> lexbuf +val from_function : (bytes -> int -> int) -> lexbuf (** Create a lexer buffer with the given function as its reading method. When the scanner needs more characters, it will call the given - function, giving it a character string [s] and a character - count [n]. The function should put [n] characters or less in [s], - starting at character number 0, and return the number of characters + function, giving it a byte sequence [s] and a byte + count [n]. The function should put [n] bytes or fewer in [s], + starting at index 0, and return the number of bytes provided. A return value of 0 means end of input. *) diff --git a/stdlib/list.mli b/stdlib/list.mli index 5b88f229d..b53a63c64 100644 --- a/stdlib/list.mli +++ b/stdlib/list.mli @@ -112,14 +112,14 @@ val fold_right : ('a -> 'b -> 'b) -> 'a list -> 'b -> 'b val iter2 : ('a -> 'b -> unit) -> 'a list -> 'b list -> unit (** [List.iter2 f [a1; ...; an] [b1; ...; bn]] calls in turn [f a1 b1; ...; f an bn]. - Raise [Invalid_argument] if the two lists have - different lengths. *) + Raise [Invalid_argument] if the two lists are determined + to have different lengths. *) val map2 : ('a -> 'b -> 'c) -> 'a list -> 'b list -> 'c list (** [List.map2 f [a1; ...; an] [b1; ...; bn]] is [[f a1 b1; ...; f an bn]]. - Raise [Invalid_argument] if the two lists have - different lengths. Not tail-recursive. *) + Raise [Invalid_argument] if the two lists are determined + to have different lengths. Not tail-recursive. *) val rev_map2 : ('a -> 'b -> 'c) -> 'a list -> 'b list -> 'c list (** [List.rev_map2 f l1 l2] gives the same result as @@ -129,14 +129,14 @@ val rev_map2 : ('a -> 'b -> 'c) -> 'a list -> 'b list -> 'c list val fold_left2 : ('a -> 'b -> 'c -> 'a) -> 'a -> 'b list -> 'c list -> 'a (** [List.fold_left2 f a [b1; ...; bn] [c1; ...; cn]] is [f (... (f (f a b1 c1) b2 c2) ...) bn cn]. - Raise [Invalid_argument] if the two lists have - different lengths. *) + Raise [Invalid_argument] if the two lists are determined + to have different lengths. *) val fold_right2 : ('a -> 'b -> 'c -> 'c) -> 'a list -> 'b list -> 'c -> 'c (** [List.fold_right2 f [a1; ...; an] [b1; ...; bn] c] is [f a1 b1 (f a2 b2 (... (f an bn c) ...))]. - Raise [Invalid_argument] if the two lists have - different lengths. Not tail-recursive. *) + Raise [Invalid_argument] if the two lists are determined + to have different lengths. Not tail-recursive. *) (** {6 List scanning} *) @@ -154,13 +154,13 @@ val exists : ('a -> bool) -> 'a list -> bool val for_all2 : ('a -> 'b -> bool) -> 'a list -> 'b list -> bool (** Same as {!List.for_all}, but for a two-argument predicate. - Raise [Invalid_argument] if the two lists have - different lengths. *) + Raise [Invalid_argument] if the two lists are determined + to have different lengths. *) val exists2 : ('a -> 'b -> bool) -> 'a list -> 'b list -> bool (** Same as {!List.exists}, but for a two-argument predicate. - Raise [Invalid_argument] if the two lists have - different lengths. *) + Raise [Invalid_argument] if the two lists are determined + to have different lengths. *) val mem : 'a -> 'a list -> bool (** [mem a l] is true if and only if [a] is equal diff --git a/stdlib/listLabels.mli b/stdlib/listLabels.mli index 8cf651471..45e3c41ea 100644 --- a/stdlib/listLabels.mli +++ b/stdlib/listLabels.mli @@ -112,14 +112,14 @@ val fold_right : f:('a -> 'b -> 'b) -> 'a list -> init:'b -> 'b val iter2 : f:('a -> 'b -> unit) -> 'a list -> 'b list -> unit (** [List.iter2 f [a1; ...; an] [b1; ...; bn]] calls in turn [f a1 b1; ...; f an bn]. - Raise [Invalid_argument] if the two lists have - different lengths. *) + Raise [Invalid_argument] if the two lists are determined + to have different lengths. *) val map2 : f:('a -> 'b -> 'c) -> 'a list -> 'b list -> 'c list (** [List.map2 f [a1; ...; an] [b1; ...; bn]] is [[f a1 b1; ...; f an bn]]. - Raise [Invalid_argument] if the two lists have - different lengths. Not tail-recursive. *) + Raise [Invalid_argument] if the two lists are determined + to have different lengths. Not tail-recursive. *) val rev_map2 : f:('a -> 'b -> 'c) -> 'a list -> 'b list -> 'c list (** [List.rev_map2 f l1 l2] gives the same result as @@ -130,15 +130,15 @@ val fold_left2 : f:('a -> 'b -> 'c -> 'a) -> init:'a -> 'b list -> 'c list -> 'a (** [List.fold_left2 f a [b1; ...; bn] [c1; ...; cn]] is [f (... (f (f a b1 c1) b2 c2) ...) bn cn]. - Raise [Invalid_argument] if the two lists have - different lengths. *) + Raise [Invalid_argument] if the two lists are determined + to have different lengths. *) val fold_right2 : f:('a -> 'b -> 'c -> 'c) -> 'a list -> 'b list -> init:'c -> 'c (** [List.fold_right2 f [a1; ...; an] [b1; ...; bn] c] is [f a1 b1 (f a2 b2 (... (f an bn c) ...))]. - Raise [Invalid_argument] if the two lists have - different lengths. Not tail-recursive. *) + Raise [Invalid_argument] if the two lists are determined + to have different lengths. Not tail-recursive. *) (** {6 List scanning} *) @@ -156,13 +156,13 @@ val exists : f:('a -> bool) -> 'a list -> bool val for_all2 : f:('a -> 'b -> bool) -> 'a list -> 'b list -> bool (** Same as {!ListLabels.for_all}, but for a two-argument predicate. - Raise [Invalid_argument] if the two lists have - different lengths. *) + Raise [Invalid_argument] if the two lists are determined + to have different lengths. *) val exists2 : f:('a -> 'b -> bool) -> 'a list -> 'b list -> bool (** Same as {!ListLabels.exists}, but for a two-argument predicate. - Raise [Invalid_argument] if the two lists have - different lengths. *) + Raise [Invalid_argument] if the two lists are determined + to have different lengths. *) val mem : 'a -> set:'a list -> bool (** [mem a l] is true if and only if [a] is equal diff --git a/stdlib/map.mli b/stdlib/map.mli index 6dd371b52..abf453161 100644 --- a/stdlib/map.mli +++ b/stdlib/map.mli @@ -19,6 +19,26 @@ All operations over maps are purely applicative (no side-effects). The implementation uses balanced binary trees, and therefore searching and insertion take time logarithmic in the size of the map. + + For instance: + {[ + module IntPairs = + struct + type t = int * int + let compare (x0,y0) (x1,y1) = + match Pervasives.compare x0 x1 with + 0 -> Pervasives.compare y0 y1 + | c -> c + end + + module PairsMap = Map.Make(IntPairs) + + let m = PairsMap.(empty |> add (0,1) "hello" |> add (1,0) "world") + ]} + + This creates a new module [PairsMap], with a new type ['a PairsMap.t] + of maps from [int * int] to ['a]. In this example, [m] contains [string] + values so its type is [string PairsMap.t]. *) module type OrderedType = diff --git a/stdlib/marshal.ml b/stdlib/marshal.ml index 7a65a16a3..415559571 100644 --- a/stdlib/marshal.ml +++ b/stdlib/marshal.ml @@ -19,35 +19,48 @@ type extern_flags = external to_channel: out_channel -> 'a -> extern_flags list -> unit = "caml_output_value" +external to_bytes: 'a -> extern_flags list -> bytes + = "caml_output_value_to_string" external to_string: 'a -> extern_flags list -> string = "caml_output_value_to_string" external to_buffer_unsafe: - string -> int -> int -> 'a -> extern_flags list -> int + bytes -> int -> int -> 'a -> extern_flags list -> int = "caml_output_value_to_buffer" let to_buffer buff ofs len v flags = - if ofs < 0 || len < 0 || ofs > String.length buff - len + if ofs < 0 || len < 0 || ofs > Bytes.length buff - len then invalid_arg "Marshal.to_buffer: substring out of bounds" else to_buffer_unsafe buff ofs len v flags +(* The functions below use byte sequences as input, never using any + mutation. It makes sense to use non-mutated [bytes] rather than + [string], because we really work with sequences of bytes, not + a text representation. +*) + external from_channel: in_channel -> 'a = "caml_input_value" -external from_string_unsafe: string -> int -> 'a +external from_bytes_unsafe: bytes -> int -> 'a = "caml_input_value_from_string" -external data_size_unsafe: string -> int -> int = "caml_marshal_data_size" +external data_size_unsafe: bytes -> int -> int = "caml_marshal_data_size" let header_size = 20 let data_size buff ofs = - if ofs < 0 || ofs > String.length buff - header_size + if ofs < 0 || ofs > Bytes.length buff - header_size then invalid_arg "Marshal.data_size" else data_size_unsafe buff ofs let total_size buff ofs = header_size + data_size buff ofs -let from_string buff ofs = - if ofs < 0 || ofs > String.length buff - header_size - then invalid_arg "Marshal.from_size" +let from_bytes buff ofs = + if ofs < 0 || ofs > Bytes.length buff - header_size + then invalid_arg "Marshal.from_bytes" else begin let len = data_size_unsafe buff ofs in - if ofs > String.length buff - (header_size + len) - then invalid_arg "Marshal.from_string" - else from_string_unsafe buff ofs + if ofs > Bytes.length buff - (header_size + len) + then invalid_arg "Marshal.from_bytes" + else from_bytes_unsafe buff ofs end + +let from_string buff ofs = + (* Bytes.unsafe_of_string is safe here, as the produced byte + sequence is never mutated *) + from_bytes (Bytes.unsafe_of_string buff) ofs diff --git a/stdlib/marshal.mli b/stdlib/marshal.mli index f12af9fd9..9dfdd1624 100644 --- a/stdlib/marshal.mli +++ b/stdlib/marshal.mli @@ -33,12 +33,13 @@ Anything can happen at run-time if the object in the file does not belong to the given type. - OCaml exception values (of type [exn]) returned by the unmarhsaller - should not be pattern-matched over through [match ... with] or [try - ... with], because unmarshalling does not preserve the information - required for matching their exception constructor. Structural - equalities with other exception values, or most other uses such as - Printexc.to_string, will still work as expected. + Values of extensible variant types, for example exceptions (of + extensible type [exn]), returned by the unmarhsaller should not be + pattern-matched over through [match ... with] or [try ... with], + because unmarshalling does not preserve the information required for + matching their constructors. Structural equalities with other + extensible variant values does not work either. Most other uses such + as Printexc.to_string, will still work as expected. The representation of marshaled values is not human-readable, and uses bytes that are not printable characters. Therefore, @@ -47,7 +48,7 @@ [open_out_bin] or [open_in_bin]; channels opened in text mode will cause unmarshaling errors on platforms where text channels behave differently than binary channels, e.g. Windows. -*) + *) type extern_flags = No_sharing (** Don't preserve sharing *) @@ -74,17 +75,26 @@ val to_channel : out_channel -> 'a -> extern_flags list -> unit byte representations if [v] actually contains sharing, or even non-termination if [v] contains cycles. - If [flags] does not contain [Marshal.Closures], - marshaling fails when it encounters a functional value - inside [v]: only 'pure' data structures, containing neither - functions nor objects, can safely be transmitted between - different programs. If [flags] contains [Marshal.Closures], - functional values will be marshaled as a position in the code - of the program. In this case, the output of marshaling can - only be read back in processes that run exactly the same program, - with exactly the same compiled code. (This is checked - at un-marshaling time, using an MD5 digest of the code - transmitted along with the code position.) + If [flags] does not contain [Marshal.Closures], marshaling fails + when it encounters a functional value inside [v]: only 'pure' data + structures, containing neither functions nor objects, can safely be + transmitted between different programs. If [flags] contains + [Marshal.Closures], functional values will be marshaled as a the + position in the code of the program together with the values + corresponding to the free variables captured in the closure. In + this case, the output of marshaling can only be read back in + processes that run exactly the same program, with exactly the same + compiled code. (This is checked at un-marshaling time, using an MD5 + digest of the code transmitted along with the code position.) + + The exact definition of which free variables are captured in a + closure is not specified and can very between bytecode and native + code (and according to optimization flags). In particular, a + function value accessing a global reference may or may not include + the reference in its closure. If it does, unmarshaling the + corresponding closure will create a new reference, different from + the global one. + If [flags] contains [Marshal.Compat_32], marshaling fails when it encounters an integer value outside the range [[-2{^30}, 2{^30}-1]] @@ -99,19 +109,24 @@ val to_channel : out_channel -> 'a -> extern_flags list -> unit it has no effect if marshaling is performed on a 32-bit platform. *) -external to_string : - 'a -> extern_flags list -> string = "caml_output_value_to_string" -(** [Marshal.to_string v flags] returns a string containing - the representation of [v] as a sequence of bytes. +external to_bytes : + 'a -> extern_flags list -> bytes = "caml_output_value_to_string" +(** [Marshal.to_bytes v flags] returns a byte sequence containing + the representation of [v]. The [flags] argument has the same meaning as for {!Marshal.to_channel}. *) -val to_buffer : string -> int -> int -> 'a -> extern_flags list -> int +external to_string : + 'a -> extern_flags list -> string = "caml_output_value_to_string" +(** Same as [to_bytes] but return the result as a string instead of + a byte sequence. *) + +val to_buffer : bytes -> int -> int -> 'a -> extern_flags list -> int (** [Marshal.to_buffer buff ofs len v flags] marshals the value [v], - storing its byte representation in the string [buff], - starting at character number [ofs], and writing at most - [len] characters. It returns the number of characters - actually written to the string. If the byte representation + storing its byte representation in the sequence [buff], + starting at index [ofs], and writing at most + [len] bytes. It returns the number of bytes + actually written to the sequence. If the byte representation of [v] does not fit in [len] characters, the exception [Failure] is raised. *) @@ -121,36 +136,41 @@ val from_channel : in_channel -> 'a one of the [Marshal.to_*] functions, and reconstructs and returns the corresponding value.*) -val from_string : string -> int -> 'a -(** [Marshal.from_string buff ofs] unmarshals a structured value +val from_bytes : bytes -> int -> 'a +(** [Marshal.from_bytes buff ofs] unmarshals a structured value like {!Marshal.from_channel} does, except that the byte representation is not read from a channel, but taken from - the string [buff], starting at position [ofs]. *) + the byte sequence [buff], starting at position [ofs]. + The byte sequence is not mutated. *) + +val from_string : string -> int -> 'a +(** Same as [from_bytes] but take a string as argument instead of a + byte sequence. *) val header_size : int (** The bytes representing a marshaled value are composed of a fixed-size header and a variable-sized data part, whose size can be determined from the header. - {!Marshal.header_size} is the size, in characters, of the header. - {!Marshal.data_size}[ buff ofs] is the size, in characters, + {!Marshal.header_size} is the size, in bytes, of the header. + {!Marshal.data_size}[ buff ofs] is the size, in bytes, of the data part, assuming a valid header is stored in [buff] starting at position [ofs]. Finally, {!Marshal.total_size} [buff ofs] is the total size, - in characters, of the marshaled value. + in bytes, of the marshaled value. Both {!Marshal.data_size} and {!Marshal.total_size} raise [Failure] if [buff], [ofs] does not contain a valid header. To read the byte representation of a marshaled value into - a string buffer, the program needs to read first - {!Marshal.header_size} characters into the buffer, + a byte sequence, the program needs to read first + {!Marshal.header_size} bytes into the sequence, then determine the length of the remainder of the representation using {!Marshal.data_size}, - make sure the buffer is large enough to hold the remaining - data, then read it, and finally call {!Marshal.from_string} + make sure the sequence is large enough to hold the remaining + data, then read it, and finally call {!Marshal.from_bytes} to unmarshal the value. *) -val data_size : string -> int -> int +val data_size : bytes -> int -> int (** See {!Marshal.header_size}.*) -val total_size : string -> int -> int +val total_size : bytes -> int -> int (** See {!Marshal.header_size}.*) diff --git a/stdlib/nativeint.mli b/stdlib/nativeint.mli index eb2dde2cf..3dce1b6c4 100644 --- a/stdlib/nativeint.mli +++ b/stdlib/nativeint.mli @@ -16,8 +16,8 @@ This module provides operations on the type [nativeint] of signed 32-bit integers (on 32-bit platforms) or signed 64-bit integers (on 64-bit platforms). - This integer type has exactly the same width as that of a [long] - integer type in the C compiler. All arithmetic operations over + This integer type has exactly the same width as that of a + pointer type in the C compiler. All arithmetic operations over [nativeint] are taken modulo 2{^32} or 2{^64} depending on the word size of the architecture. diff --git a/stdlib/obj.ml b/stdlib/obj.ml index a6f11586e..ac9695cdb 100644 --- a/stdlib/obj.ml +++ b/stdlib/obj.ml @@ -33,9 +33,9 @@ external truncate : t -> int -> unit = "caml_obj_truncate" external add_offset : t -> Int32.t -> t = "caml_obj_add_offset" let marshal (obj : t) = - Marshal.to_string obj [] + Marshal.to_bytes obj [] let unmarshal str pos = - (Marshal.from_string str pos, pos + Marshal.total_size str pos) + (Marshal.from_bytes str pos, pos + Marshal.total_size str pos) let lazy_tag = 246 let closure_tag = 247 @@ -56,3 +56,33 @@ let final_tag = custom_tag let int_tag = 1000 let out_of_heap_tag = 1001 let unaligned_tag = 1002 + +let extension_slot x = + let x = repr x in + let slot = + if (is_block x) && (tag x) <> object_tag && (size x) >= 1 then field x 0 + else x + in + let name = + if (is_block slot) && (tag slot) = object_tag then field slot 0 + else raise Not_found + in + if (tag name) = string_tag then slot + else raise Not_found + +let extension_name x = + try + let slot = extension_slot x in + (obj (field slot 0) : string) + with Not_found -> invalid_arg "Obj.extension_name" + +let extension_id x = + try + let slot = extension_slot x in + (obj (field slot 1) : int) + with Not_found -> invalid_arg "Obj.extension_id" + +let extension_slot x = + try + extension_slot x + with Not_found -> invalid_arg "Obj.extension_slot" diff --git a/stdlib/obj.mli b/stdlib/obj.mli index 9a5bd721d..08b8a4f64 100644 --- a/stdlib/obj.mli +++ b/stdlib/obj.mli @@ -43,18 +43,22 @@ val infix_tag : int val forward_tag : int val no_scan_tag : int val abstract_tag : int -val string_tag : int +val string_tag : int (* both [string] and [bytes] *) val double_tag : int val double_array_tag : int val custom_tag : int -val final_tag : int (* DEPRECATED *) +val final_tag : int [@@ocaml.deprecated] val int_tag : int val out_of_heap_tag : int val unaligned_tag : int (* should never happen @since 3.11.0 *) +val extension_name : 'a -> string +val extension_id : 'a -> int +val extension_slot : 'a -> t + (** The following two functions are deprecated. Use module {!Marshal} instead. *) -val marshal : t -> string -val unmarshal : string -> int -> t * int +val marshal : t -> bytes [@@ocaml.deprecated] +val unmarshal : bytes -> int -> t * int [@@ocaml.deprecated] diff --git a/stdlib/parsing.ml b/stdlib/parsing.ml index 762128244..47e151e1b 100644 --- a/stdlib/parsing.ml +++ b/stdlib/parsing.ml @@ -84,10 +84,10 @@ external set_trace: bool -> bool = "caml_set_parser_trace" let env = - { s_stack = Array.create 100 0; - v_stack = Array.create 100 (Obj.repr ()); - symb_start_stack = Array.create 100 dummy_pos; - symb_end_stack = Array.create 100 dummy_pos; + { s_stack = Array.make 100 0; + v_stack = Array.make 100 (Obj.repr ()); + symb_start_stack = Array.make 100 dummy_pos; + symb_end_stack = Array.make 100 dummy_pos; stacksize = 100; stackbase = 0; curr_char = 0; @@ -104,10 +104,10 @@ let env = let grow_stacks() = let oldsize = env.stacksize in let newsize = oldsize * 2 in - let new_s = Array.create newsize 0 - and new_v = Array.create newsize (Obj.repr ()) - and new_start = Array.create newsize dummy_pos - and new_end = Array.create newsize dummy_pos in + let new_s = Array.make newsize 0 + and new_v = Array.make newsize (Obj.repr ()) + and new_start = Array.make newsize dummy_pos + and new_end = Array.make newsize dummy_pos in Array.blit env.s_stack 0 new_s 0 oldsize; env.s_stack <- new_s; Array.blit env.v_stack 0 new_v 0 oldsize; diff --git a/stdlib/pervasives.ml b/stdlib/pervasives.ml index e4a07c3de..6b7165206 100644 --- a/stdlib/pervasives.ml +++ b/stdlib/pervasives.ml @@ -37,6 +37,18 @@ exception Exit external ( |> ) : 'a -> ('a -> 'b) -> 'b = "%revapply" external ( @@ ) : ('a -> 'b) -> 'a -> 'b = "%apply" +(* Debugging *) + +external __LOC__ : string = "%loc_LOC" +external __FILE__ : string = "%loc_FILE" +external __LINE__ : int = "%loc_LINE" +external __MODULE__ : string = "%loc_MODULE" +external __POS__ : string * int * int * int = "%loc_POS" + +external __LOC_OF__ : 'a -> string * 'a = "%loc_LOC" +external __LINE_OF__ : 'a -> int * 'a = "%loc_LINE" +external __POS_OF__ : 'a -> (string * int * int * int) * 'a = "%loc_POS" + (* Comparisons *) external ( = ) : 'a -> 'a -> bool = "%equal" @@ -69,7 +81,7 @@ external succ : int -> int = "%succint" external pred : int -> int = "%predint" external ( + ) : int -> int -> int = "%addint" external ( - ) : int -> int -> int = "%subint" -external ( * ) : int -> int -> int = "%mulint" +external ( * ) : int -> int -> int = "%mulint" external ( / ) : int -> int -> int = "%divint" external ( mod ) : int -> int -> int = "%modint" @@ -150,19 +162,24 @@ type fpclass = | FP_nan external classify_float : float -> fpclass = "caml_classify_float" -(* String operations -- more in module String *) +(* String and byte sequence operations -- more in modules String and Bytes *) external string_length : string -> int = "%string_length" -external string_create : int -> string = "caml_create_string" -external string_blit : string -> int -> string -> int -> int -> unit +external bytes_length : bytes -> int = "%string_length" +external bytes_create : int -> bytes = "caml_create_string" +external string_blit : string -> int -> bytes -> int -> int -> unit = "caml_blit_string" "noalloc" +external bytes_blit : bytes -> int -> bytes -> int -> int -> unit + = "caml_blit_string" "noalloc" +external bytes_unsafe_to_string : bytes -> string = "%identity" +external bytes_unsafe_of_string : string -> bytes = "%identity" let ( ^ ) s1 s2 = let l1 = string_length s1 and l2 = string_length s2 in - let s = string_create (l1 + l2) in + let s = bytes_create (l1 + l2) in string_blit s1 0 s 0 l1; string_blit s2 0 s l1 l2; - s + bytes_unsafe_to_string s (* Character operations -- more in module Char *) @@ -180,6 +197,15 @@ external ignore : 'a -> unit = "%ignore" external fst : 'a * 'b -> 'a = "%field0" external snd : 'a * 'b -> 'b = "%field1" +(* References *) + +type 'a ref = { mutable contents : 'a } +external ref : 'a -> 'a ref = "%makemutable" +external ( ! ) : 'a ref -> 'a = "%field0" +external ( := ) : 'a ref -> 'a -> unit = "%setfield0" +external incr : int ref -> unit = "%incr" +external decr : int ref -> unit = "%decr" + (* String conversion functions *) external format_int : string -> int -> string = "caml_format_int" @@ -196,16 +222,13 @@ let string_of_int n = format_int "%d" n external int_of_string : string -> int = "caml_int_of_string" - -module String = struct - external get : string -> int -> char = "%string_safe_get" -end +external string_get : string -> int -> char = "%string_safe_get" let valid_float_lexem s = let l = string_length s in let rec loop i = if i >= l then s ^ "." else - match s.[i] with + match string_get s i with | '0' .. '9' | '-' -> loop (i + 1) | _ -> s in @@ -265,19 +288,29 @@ let flush_all () = | a :: l -> (try flush a with _ -> ()); iter l in iter (out_channels_list ()) -external unsafe_output : out_channel -> string -> int -> int -> unit +external unsafe_output : out_channel -> bytes -> int -> int -> unit = "caml_ml_output" +external unsafe_output_string : out_channel -> string -> int -> int -> unit + = "caml_ml_output" external output_char : out_channel -> char -> unit = "caml_ml_output_char" +let output_bytes oc s = + unsafe_output oc s 0 (bytes_length s) + let output_string oc s = - unsafe_output oc s 0 (string_length s) + unsafe_output_string oc s 0 (string_length s) let output oc s ofs len = - if ofs < 0 || len < 0 || ofs > string_length s - len + if ofs < 0 || len < 0 || ofs > bytes_length s - len then invalid_arg "output" else unsafe_output oc s ofs len +let output_substring oc s ofs len = + if ofs < 0 || len < 0 || ofs > string_length s - len + then invalid_arg "output_substring" + else unsafe_output_string oc s ofs len + external output_byte : out_channel -> int -> unit = "caml_ml_output_char" external output_binary_int : out_channel -> int -> unit = "caml_ml_output_int" @@ -309,11 +342,11 @@ let open_in_bin name = external input_char : in_channel -> char = "caml_ml_input_char" -external unsafe_input : in_channel -> string -> int -> int -> int +external unsafe_input : in_channel -> bytes -> int -> int -> int = "caml_ml_input" let input ic s ofs len = - if ofs < 0 || len < 0 || ofs > string_length s - len + if ofs < 0 || len < 0 || ofs > bytes_length s - len then invalid_arg "input" else unsafe_input ic s ofs len @@ -326,39 +359,44 @@ let rec unsafe_really_input ic s ofs len = end let really_input ic s ofs len = - if ofs < 0 || len < 0 || ofs > string_length s - len + if ofs < 0 || len < 0 || ofs > bytes_length s - len then invalid_arg "really_input" else unsafe_really_input ic s ofs len +let really_input_string ic len = + let s = bytes_create len in + really_input ic s 0 len; + bytes_unsafe_to_string s + external input_scan_line : in_channel -> int = "caml_ml_input_scan_line" let input_line chan = let rec build_result buf pos = function [] -> buf | hd :: tl -> - let len = string_length hd in - string_blit hd 0 buf (pos - len) len; + let len = bytes_length hd in + bytes_blit hd 0 buf (pos - len) len; build_result buf (pos - len) tl in let rec scan accu len = let n = input_scan_line chan in if n = 0 then begin (* n = 0: we are at EOF *) match accu with [] -> raise End_of_file - | _ -> build_result (string_create len) len accu + | _ -> build_result (bytes_create len) len accu end else if n > 0 then begin (* n > 0: newline found in buffer *) - let res = string_create (n - 1) in + let res = bytes_create (n - 1) in ignore (unsafe_input chan res 0 (n - 1)); ignore (input_char chan); (* skip the newline *) match accu with [] -> res | _ -> let len = len + n - 1 in - build_result (string_create len) len (res :: accu) + build_result (bytes_create len) len (res :: accu) end else begin (* n < 0: newline not found *) - let beg = string_create (-n) in + let beg = bytes_create (-n) in ignore(unsafe_input chan beg 0 (-n)); scan (beg :: accu) (len - n) end - in scan [] 0 + in bytes_unsafe_to_string (scan [] 0) external input_byte : in_channel -> int = "caml_ml_input_char" external input_binary_int : in_channel -> int = "caml_ml_input_int" @@ -375,6 +413,7 @@ external set_binary_mode_in : in_channel -> bool -> unit let print_char c = output_char stdout c let print_string s = output_string stdout s +let print_bytes s = output_bytes stdout s let print_int i = output_string stdout (string_of_int i) let print_float f = output_string stdout (string_of_float f) let print_endline s = @@ -385,6 +424,7 @@ let print_newline () = output_char stdout '\n'; flush stdout let prerr_char c = output_char stderr c let prerr_string s = output_string stderr s +let prerr_bytes s = output_bytes stderr s let prerr_int i = output_string stderr (string_of_int i) let prerr_float f = output_string stderr (string_of_float f) let prerr_endline s = @@ -410,43 +450,26 @@ module LargeFile = external in_channel_length : in_channel -> int64 = "caml_ml_channel_size_64" end -(* References *) +(* Formats *) -type 'a ref = { mutable contents : 'a } -external ref : 'a -> 'a ref = "%makemutable" -external ( ! ) : 'a ref -> 'a = "%field0" -external ( := ) : 'a ref -> 'a -> unit = "%setfield0" -external incr : int ref -> unit = "%incr" -external decr : int ref -> unit = "%decr" +type ('a, 'b, 'c, 'd, 'e, 'f) format6 + = ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.format6 + = Format of ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.fmt + * string -(* Formats *) type ('a, 'b, 'c, 'd) format4 = ('a, 'b, 'c, 'c, 'c, 'd) format6 type ('a, 'b, 'c) format = ('a, 'b, 'c, 'c) format4 +let string_of_format (Format (fmt, str)) = str + external format_of_string : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> ('a, 'b, 'c, 'd, 'e, 'f) format6 = "%identity" -external format_to_string : - ('a, 'b, 'c, 'd, 'e, 'f) format6 -> string = "%identity" -external string_to_format : - string -> ('a, 'b, 'c, 'd, 'e, 'f) format6 = "%identity" - -let (( ^^ ) : - ('a, 'b, 'c, 'd, 'e, 'f) format6 -> - ('f, 'b, 'c, 'e, 'g, 'h) format6 -> - ('a, 'b, 'c, 'd, 'g, 'h) format6) = - fun fmt1 fmt2 -> - string_to_format (format_to_string fmt1 ^ "%," ^ format_to_string fmt2) -;; - -let string_of_format fmt = - let s = format_to_string fmt in - let l = string_length s in - let r = string_create l in - string_blit s 0 r 0 l; - r +let (^^) (Format (fmt1, str1)) (Format (fmt2, str2)) = + Format (CamlinternalFormatBasics.concat_fmt fmt1 fmt2, + str1 ^ "%," ^ str2) (* Miscellaneous *) diff --git a/stdlib/pervasives.mli b/stdlib/pervasives.mli index 0a2e4af6a..d471a4ebb 100644 --- a/stdlib/pervasives.mli +++ b/stdlib/pervasives.mli @@ -14,8 +14,8 @@ (** The initially opened module. This module provides the basic operations over the built-in types - (numbers, booleans, strings, exceptions, references, lists, arrays, - input-output channels, ...). + (numbers, booleans, byte sequences, strings, exceptions, references, + lists, arrays, input-output channels, ...). This module is automatically opened at the beginning of each compilation. All components of this module can therefore be referred by their short @@ -68,7 +68,7 @@ external ( <= ) : 'a -> 'a -> bool = "%lessequal" external ( >= ) : 'a -> 'a -> bool = "%greaterequal" (** Structural ordering functions. These functions coincide with - the usual orderings over integers, characters, strings + the usual orderings over integers, characters, strings, byte sequences and floating-point numbers, and extend them to a total ordering over all types. The ordering is compatible with [( = )]. As in the case @@ -107,7 +107,7 @@ val max : 'a -> 'a -> 'a external ( == ) : 'a -> 'a -> bool = "%eq" (** [e1 == e2] tests for physical equality of [e1] and [e2]. - On mutable types such as references, arrays, strings, records with + On mutable types such as references, arrays, byte sequences, records with mutable fields and objects with mutable instance variables, [e1 == e2] is true if and only if physical modification of [e1] also affects [e2]. @@ -130,6 +130,7 @@ external ( && ) : bool -> bool -> bool = "%sequand" [e2] is not evaluated at all. *) external ( & ) : bool -> bool -> bool = "%sequand" + [@@ocaml.deprecated] (** @deprecated {!Pervasives.( && )} should be used instead. *) external ( || ) : bool -> bool -> bool = "%sequor" @@ -138,8 +139,47 @@ external ( || ) : bool -> bool -> bool = "%sequor" [e2] is not evaluated at all. *) external ( or ) : bool -> bool -> bool = "%sequor" + [@@ocaml.deprecated] (** @deprecated {!Pervasives.( || )} should be used instead.*) +(** {6 Debugging} *) + +external __LOC__ : string = "%loc_LOC" +(** [__LOC__] returns the location at which this expression appears in + the file currently being parsed by the compiler, with the standard + error format of OCaml: "File %S, line %d, characters %d-%d" *) +external __FILE__ : string = "%loc_FILE" +(** [__FILE__] returns the name of the file currently being + parsed by the compiler. *) +external __LINE__ : int = "%loc_LINE" +(** [__LINE__] returns the line number at which this expression + appears in the file currently being parsed by the compiler. *) +external __MODULE__ : string = "%loc_MODULE" +(** [__MODULE__] returns the module name of the file being + parsed by the compiler. *) +external __POS__ : string * int * int * int = "%loc_POS" +(** [__POS__] returns a tuple [(file,lnum,cnum,enum)], corresponding + to the location at which this expression appears in the file + currently being parsed by the compiler. [file] is the current + filename, [lnum] the line number, [cnum] the character position in + the line and [enum] the last character position in the line. *) + +external __LOC_OF__ : 'a -> string * 'a = "%loc_LOC" +(** [__LOC_OF__ expr] returns a pair [(loc, expr)] where [loc] is the + location of [expr] in the file currently being parsed by the + compiler, with the standard error format of OCaml: "File %S, line + %d, characters %d-%d" *) +external __LINE_OF__ : 'a -> int * 'a = "%loc_LINE" +(** [__LINE__ expr] returns a pair [(line, expr)], where [line] is the + line number at which the expression [expr] appears in the file + currently being parsed by the compiler. *) +external __POS_OF__ : 'a -> (string * int * int * int) * 'a = "%loc_POS" +(** [__POS_OF__ expr] returns a pair [(expr,loc)], where [loc] is a + tuple [(file,lnum,cnum,enum)] corresponding to the location at + which the expression [expr] appears in the file currently being + parsed by the compiler. [file] is the current filename, [lnum] the + line number, [cnum] the character position in the line and [enum] + the last character position in the line. *) (** {6 Composition operators} *) @@ -553,6 +593,9 @@ val print_char : char -> unit val print_string : string -> unit (** Print a string on standard output. *) +val print_bytes : bytes -> unit +(** Print a byte sequence on standard output. *) + val print_int : int -> unit (** Print an integer, in decimal, on standard output. *) @@ -577,6 +620,9 @@ val prerr_char : char -> unit val prerr_string : string -> unit (** Print a string on standard error. *) +val prerr_bytes : bytes -> unit +(** Print a byte sequence on standard error. *) + val prerr_int : int -> unit (** Print an integer, in decimal, on standard error. *) @@ -584,8 +630,8 @@ val prerr_float : float -> unit (** Print a floating-point number, in decimal, on standard error. *) val prerr_endline : string -> unit -(** Print a string, followed by a newline character on standard error - and flush standard error. *) +(** Print a string, followed by a newline character on standard + error and flush standard error. *) val prerr_newline : unit -> unit (** Print a newline character on standard error, and flush @@ -661,11 +707,18 @@ val output_char : out_channel -> char -> unit val output_string : out_channel -> string -> unit (** Write the string on the given output channel. *) -val output : out_channel -> string -> int -> int -> unit -(** [output oc buf pos len] writes [len] characters from string [buf], +val output_bytes : out_channel -> bytes -> unit +(** Write the byte sequence on the given output channel. *) + +val output : out_channel -> bytes -> int -> int -> unit +(** [output oc buf pos len] writes [len] characters from byte sequence [buf], starting at offset [pos], to the given output channel [oc]. Raise [Invalid_argument "output"] if [pos] and [len] do not - designate a valid substring of [buf]. *) + designate a valid range of [buf]. *) + +val output_substring : out_channel -> string -> int -> int -> unit +(** Same as [output] but take a string as argument instead of + a byte sequence. *) val output_byte : out_channel -> int -> unit (** Write one 8-bit integer (as the single character with that code) @@ -756,9 +809,9 @@ val input_line : in_channel -> string Raise [End_of_file] if the end of the file is reached at the beginning of line. *) -val input : in_channel -> string -> int -> int -> int +val input : in_channel -> bytes -> int -> int -> int (** [input ic buf pos len] reads up to [len] characters from - the given channel [ic], storing them in string [buf], starting at + the given channel [ic], storing them in byte sequence [buf], starting at character number [pos]. It returns the actual number of characters read, between 0 and [len] (inclusive). @@ -771,15 +824,21 @@ val input : in_channel -> string -> int -> int -> int if desired. (See also {!Pervasives.really_input} for reading exactly [len] characters.) Exception [Invalid_argument "input"] is raised if [pos] and [len] - do not designate a valid substring of [buf]. *) + do not designate a valid range of [buf]. *) -val really_input : in_channel -> string -> int -> int -> unit +val really_input : in_channel -> bytes -> int -> int -> unit (** [really_input ic buf pos len] reads [len] characters from channel [ic], - storing them in string [buf], starting at character number [pos]. + storing them in byte sequence [buf], starting at character number [pos]. Raise [End_of_file] if the end of file is reached before [len] characters have been read. Raise [Invalid_argument "really_input"] if - [pos] and [len] do not designate a valid substring of [buf]. *) + [pos] and [len] do not designate a valid range of [buf]. *) + +val really_input_string : in_channel -> int -> string +(** [really_input_string ic len] reads [len] characters from channel [ic] + and returns them in a new string. + Raise [End_of_file] if the end of file is reached before [len] + characters have been read. *) val input_byte : in_channel -> int (** Same as {!Pervasives.input_char}, but return the 8-bit integer representing @@ -910,7 +969,7 @@ external decr : int ref -> unit = "%decr" *) (** Format strings have a general and highly polymorphic type - [('a, 'b, 'c, 'd, 'e, 'f) format6]. Type [format6] is built in. + [('a, 'b, 'c, 'd, 'e, 'f) format6]. The two simplified types, [format] and [format4] below are included for backward compatibility with earlier releases of OCaml. @@ -949,6 +1008,10 @@ external decr : int ref -> unit = "%decr" for the [scanf]-style functions, it is typically the result type of the receiver function. *) + +type ('a, 'b, 'c, 'd, 'e, 'f) format6 = + ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.format6 + type ('a, 'b, 'c, 'd) format4 = ('a, 'b, 'c, 'c, 'c, 'd) format6 type ('a, 'b, 'c) format = ('a, 'b, 'c, 'c) format4 @@ -1003,6 +1066,6 @@ val at_exit : (unit -> unit) -> unit val valid_float_lexem : string -> string -val unsafe_really_input : in_channel -> string -> int -> int -> unit +val unsafe_really_input : in_channel -> bytes -> int -> int -> unit val do_at_exit : unit -> unit diff --git a/stdlib/printexc.ml b/stdlib/printexc.ml index 9f20c7b46..4ebb84cea 100644 --- a/stdlib/printexc.ml +++ b/stdlib/printexc.ml @@ -82,12 +82,13 @@ let catch fct arg = eprintf "Uncaught exception: %s\n" (to_string x); exit 2 -type raw_backtrace +type raw_backtrace_slot +type raw_backtrace = raw_backtrace_slot array external get_raw_backtrace: unit -> raw_backtrace = "caml_get_exception_raw_backtrace" -type loc_info = +type backtrace_slot = | Known_location of bool (* is_raise *) * string (* filename *) * int (* line number *) @@ -98,29 +99,27 @@ type loc_info = (* to avoid warning *) let _ = [Known_location (false, "", 0, 0, 0); Unknown_location false] -type backtrace = loc_info array +external convert_raw_backtrace_slot: + raw_backtrace_slot -> backtrace_slot = "caml_convert_raw_backtrace_slot" -external convert_raw_backtrace: - raw_backtrace -> backtrace option = "caml_convert_raw_backtrace" +let convert_raw_backtrace rbckt = + try Some (Array.map convert_raw_backtrace_slot rbckt) + with Failure _ -> None -let format_loc_info pos li = - let is_raise = - match li with - | Known_location(is_raise, _, _, _, _) -> is_raise - | Unknown_location(is_raise) -> is_raise in - let info = +let format_backtrace_slot pos slot = + let info is_raise = if is_raise then if pos = 0 then "Raised at" else "Re-raised at" else if pos = 0 then "Raised by primitive operation at" else "Called from" in - match li with + match slot with + | Unknown_location true -> (* compiler-inserted re-raise, skipped *) None + | Unknown_location false -> + Some (sprintf "%s unknown location" (info false)) | Known_location(is_raise, filename, lineno, startchar, endchar) -> - sprintf "%s file \"%s\", line %d, characters %d-%d" - info filename lineno startchar endchar - | Unknown_location(is_raise) -> - sprintf "%s unknown location" - info + Some (sprintf "%s file \"%s\", line %d, characters %d-%d" + (info is_raise) filename lineno startchar endchar) let print_exception_backtrace outchan backtrace = match backtrace with @@ -129,8 +128,9 @@ let print_exception_backtrace outchan backtrace = "(Program not linked with -g, cannot print stack backtrace)\n" | Some a -> for i = 0 to Array.length a - 1 do - if a.(i) <> Unknown_location true then - fprintf outchan "%s\n" (format_loc_info i a.(i)) + match format_backtrace_slot i a.(i) with + | None -> () + | Some str -> fprintf outchan "%s\n" str done let print_raw_backtrace outchan raw_backtrace = @@ -147,20 +147,70 @@ let backtrace_to_string backtrace = | Some a -> let b = Buffer.create 1024 in for i = 0 to Array.length a - 1 do - if a.(i) <> Unknown_location true then - bprintf b "%s\n" (format_loc_info i a.(i)) + match format_backtrace_slot i a.(i) with + | None -> () + | Some str -> bprintf b "%s\n" str done; Buffer.contents b let raw_backtrace_to_string raw_backtrace = backtrace_to_string (convert_raw_backtrace raw_backtrace) +let backtrace_slot_is_raise = function + | Known_location(is_raise, _, _, _, _) -> is_raise + | Unknown_location(is_raise) -> is_raise + +type location = { + filename : string; + line_number : int; + start_char : int; + end_char : int; +} + +let backtrace_slot_location = function + | Unknown_location _ -> None + | Known_location(_is_raise, filename, line_number, + start_char, end_char) -> + Some { + filename; + line_number; + start_char; + end_char; + } + +let backtrace_slots raw_backtrace = + (* The documentation of this function guarantees that Some is + returned only if a part of the trace is usable. This gives us + a bit more work than just convert_raw_backtrace, but it makes the + API more user-friendly -- otherwise most users would have to + reimplement the "Program not linked with -g, sorry" logic + themselves. *) + match convert_raw_backtrace raw_backtrace with + | None -> None + | Some backtrace -> + let usable_slot = function + | Unknown_location _ -> false + | Known_location _ -> true in + let rec exists_usable = function + | (-1) -> false + | i -> usable_slot backtrace.(i) || exists_usable (i - 1) in + if exists_usable (Array.length backtrace - 1) + then Some backtrace + else None + +module Slot = struct + type t = backtrace_slot + let format = format_backtrace_slot + let is_raise = backtrace_slot_is_raise + let location = backtrace_slot_location +end + +let raw_backtrace_length bckt = Array.length bckt +let get_raw_backtrace_slot bckt i = Array.get bckt i + (* confusingly named: returns the *string* corresponding to the global current backtrace *) let get_backtrace () = - (* we could use the caml_get_exception_backtrace primitive here, but - we hope to deprecate it so it's better to just compose the - raw stuff *) backtrace_to_string (convert_raw_backtrace (get_raw_backtrace ())) external record_backtrace: bool -> unit = "caml_record_backtrace" @@ -169,10 +219,8 @@ external backtrace_status: unit -> bool = "caml_backtrace_status" let register_printer fn = printers := fn :: !printers - external get_callstack: int -> raw_backtrace = "caml_get_current_callstack" - let exn_slot x = let x = Obj.repr x in if Obj.tag x = 0 then Obj.field x 0 else x @@ -184,3 +232,64 @@ let exn_slot_id x = let exn_slot_name x = let slot = exn_slot x in (Obj.obj (Obj.field slot 0) : string) + + +let uncaught_exception_handler = ref None + +let set_uncaught_exception_handler fn = uncaught_exception_handler := Some fn + +let empty_backtrace : raw_backtrace = Obj.obj (Obj.new_block Obj.abstract_tag 0) + +let try_get_raw_backtrace () = + try + get_raw_backtrace () + with _ (* Out_of_memory? *) -> + empty_backtrace + +let handle_uncaught_exception' exn debugger_in_use = + try + (* Get the backtrace now, in case one of the [at_exit] function + destroys it. *) + let raw_backtrace = + if debugger_in_use (* Same test as in [byterun/printexc.c] *) then + empty_backtrace + else + try_get_raw_backtrace () + in + (try Pervasives.do_at_exit () with _ -> ()); + match !uncaught_exception_handler with + | None -> + eprintf "Fatal error: exception %s\n" (to_string exn); + print_raw_backtrace stderr raw_backtrace; + flush stderr + | Some handler -> + try + handler exn raw_backtrace + with exn' -> + let raw_backtrace' = try_get_raw_backtrace () in + eprintf "Fatal error: exception %s\n" (to_string exn); + print_raw_backtrace stderr raw_backtrace; + eprintf "Fatal error in uncaught exception handler: exception %s\n" + (to_string exn'); + print_raw_backtrace stderr raw_backtrace'; + flush stderr + with + | Out_of_memory -> + prerr_endline + "Fatal error: out of memory in uncaught exception handler" + +(* This function is called by [caml_fatal_uncaught_exception] in + [byterun/printexc.c] which expects no exception is raised. *) +let handle_uncaught_exception exn debugger_in_use = + try + handle_uncaught_exception' exn debugger_in_use + with _ -> + (* There is not much we can do at this point *) + () + +external register_named_value : string -> 'a -> unit + = "caml_register_named_value" + +let () = + register_named_value "Printexc.handle_uncaught_exception" + handle_uncaught_exception diff --git a/stdlib/printexc.mli b/stdlib/printexc.mli index c378d9cb3..6bffe174c 100644 --- a/stdlib/printexc.mli +++ b/stdlib/printexc.mli @@ -86,25 +86,46 @@ val register_printer: (exn -> string option) -> unit (** {6 Raw backtraces} *) type raw_backtrace - -(** The abstract type [backtrace] stores exception backtraces in +(** The abstract type [raw_backtrace] stores a backtrace in a low-level format, instead of directly exposing them as string as the [get_backtrace()] function does. This allows delaying the formatting of backtraces to when they are - actually printed, which might be useful if you record more + actually printed, which may be useful if you record more backtraces than you print. + + Raw backtraces cannot be marshalled. If you need marshalling, you + should use the array returned by the [backtrace_slots] function of + the next section. + + @since 4.01.0 *) val get_raw_backtrace: unit -> raw_backtrace +(** [Printexc.get_raw_backtrace ()] returns the same exception + backtrace that [Printexc.print_backtrace] would print, but in + a raw format. + + @since 4.01.0 +*) + val print_raw_backtrace: out_channel -> raw_backtrace -> unit +(** Print a raw backtrace in the same format + [Printexc.print_backtrace] uses. + + @since 4.01.0 +*) + val raw_backtrace_to_string: raw_backtrace -> string +(** Return a string from a raw backtrace, in the same format + [Printexc.get_backtrace] uses. + @since 4.01.0 +*) (** {6 Current call stack} *) val get_callstack: int -> raw_backtrace - (** [Printexc.get_callstack n] returns a description of the top of the call stack on the current program point (for the current thread), with at most [n] entries. (Note: this function is not related to @@ -113,6 +134,138 @@ val get_callstack: int -> raw_backtrace @since 4.01.0 *) +(** {6 Uncaught exceptions} *) + +val set_uncaught_exception_handler: (exn -> raw_backtrace -> unit) -> unit +(** [Printexc.set_uncaught_exception_handler fn] registers [fn] as the handler + for uncaught exceptions. The default handler prints the exception and + backtrace on standard error output. + + Note that when [fn] is called all the functions registered with + {!Pervasives.at_exit} have already been called. Because of this you must + make sure any output channel [fn] writes on is flushed. + + If [fn] raises an exception, it is ignored. + + @since 4.02.0 +*) + + +(** {6 Manipulation of backtrace information} + + Those function allow to traverse the slots of a raw backtrace, + extract information from them in a programmer-friendly format. +*) + +type backtrace_slot +(** The abstract type [backtrace_slot] represents a single slot of + a backtrace. + + @since 4.02 +*) + +val backtrace_slots : raw_backtrace -> backtrace_slot array option +(** Returns the slots of a raw backtrace, or [None] if none of them + contain useful information. + + In the return array, the slot at index [0] corresponds to the most + recent function call, raise, or primitive [get_backtrace] call in + the trace. + + Some possible reasons for returning [None] are as follow: + - none of the slots in the trace come from modules compiled with + debug information ([-g]) + - the program is a bytecode program that has not been linked with + debug information enabled ([ocamlc -g]) +*) + +type location = { + filename : string; + line_number : int; + start_char : int; + end_char : int; +} +(** The type of location information found in backtraces. [start_char] + and [end_char] are positions relative to the beginning of the + line. + + @since 4.02 +*) + +module Slot : sig + type t = backtrace_slot + + val is_raise : t -> bool + (** [is_raise slot] is [true] when [slot] refers to a raising + point in the code, and [false] when it comes from a simple + function call. + + @since 4.02 + *) + + val location : t -> location option + (** [location slot] returns the location information of the slot, + if available, and [None] otherwise. + + Some possible reasons for failing to return a location are as follow: + - the slot corresponds to a compiler-inserted raise + - the slot corresponds to a part of the program that has not been + compiled with debug information ([-g]) + + @since 4.02 + *) + + val format : int -> t -> string option + (** [format pos slot] returns the string representation of [slot] as + [raw_backtrace_to_string] would format it, assuming it is the + [pos]-th element of the backtrace: the [0]-th element is + pretty-printed differently than the others. + + Whole-backtrace printing functions also skip some uninformative + slots; in that case, [format pos slot] returns [None]. + + @since 4.02 + *) +end + + +(** {6 Raw backtrace slots} *) + +type raw_backtrace_slot +(** This type allows direct access to raw backtrace slots, without any + conversion in an OCaml-usable data-structure. Being + process-specific, they must absolutely not be marshalled, and are + unsafe to use for this reason (marshalling them may not fail, but + un-marshalling and using the result will result in + undefined behavior). + + Elements of this type can still be compared and hashed: when two + elements are equal, then they represent the same source location + (the converse is not necessarily true in presence of inlining, + for example). +*) + +val raw_backtrace_length : raw_backtrace -> int +(** [raw_backtrace_length bckt] returns the number of slots in the + backtrace [bckt]. + + @since 4.02 +*) + +val get_raw_backtrace_slot : raw_backtrace -> int -> raw_backtrace_slot +(** [get_slot bckt pos] returns the slot in position [pos] in the + backtrace [bckt]. + + @since 4.02 +*) + +val convert_raw_backtrace_slot : raw_backtrace_slot -> backtrace_slot +(** Extracts the user-friendly [backtrace_slot] from a low-level + [raw_backtrace_slot]. + + @since 4.02 +*) + (** {6 Exception slots} *) @@ -130,5 +283,3 @@ val exn_slot_name: exn -> string @since 4.02.0 *) - - diff --git a/stdlib/printf.ml b/stdlib/printf.ml index 380169204..1152429f9 100644 --- a/stdlib/printf.ml +++ b/stdlib/printf.ml @@ -11,728 +11,29 @@ (* *) (***********************************************************************) -external format_float: string -> float -> string - = "caml_format_float" -external format_int: string -> int -> string - = "caml_format_int" -external format_int32: string -> int32 -> string - = "caml_int32_format" -external format_nativeint: string -> nativeint -> string - = "caml_nativeint_format" -external format_int64: string -> int64 -> string - = "caml_int64_format" - -module Sformat = struct - - type index;; - - external unsafe_index_of_int : int -> index = "%identity" - ;; - let index_of_int i = - if i >= 0 then unsafe_index_of_int i - else failwith ("Sformat.index_of_int: negative argument " ^ string_of_int i) - ;; - external int_of_index : index -> int = "%identity" - ;; - - let add_int_index i idx = index_of_int (i + int_of_index idx);; - let succ_index = add_int_index 1;; - (* Literal position are one-based (hence pred p instead of p). *) - let index_of_literal_position p = index_of_int (pred p);; - - external length : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> int - = "%string_length" - ;; - external get : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> int -> char - = "%string_safe_get" - ;; - external unsafe_get : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> int -> char - = "%string_unsafe_get" - ;; - external unsafe_to_string : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> string - = "%identity" - ;; - let sub fmt idx len = - String.sub (unsafe_to_string fmt) (int_of_index idx) len - ;; - let to_string fmt = sub fmt (unsafe_index_of_int 0) (length fmt) - ;; - -end -;; - -let bad_conversion sfmt i c = - invalid_arg - ("Printf: bad conversion %" ^ String.make 1 c ^ ", at char number " ^ - string_of_int i ^ " in format string \'" ^ sfmt ^ "\'") -;; - -let bad_conversion_format fmt i c = - bad_conversion (Sformat.to_string fmt) i c -;; - -let incomplete_format fmt = - invalid_arg - ("Printf: premature end of format string \'" ^ - Sformat.to_string fmt ^ "\'") -;; - -(* Parses a string conversion to return the specified length and the - padding direction. *) -let parse_string_conversion sfmt = - let rec parse neg i = - if i >= String.length sfmt then (0, neg) else - match String.unsafe_get sfmt i with - | '1'..'9' -> - (int_of_string - (String.sub sfmt i (String.length sfmt - i - 1)), - neg) - | '-' -> - parse true (succ i) - | _ -> - parse neg (succ i) in - try parse false 1 with - | Failure _ -> bad_conversion sfmt 0 's' -;; - -(* Pad a (sub) string into a blank string of length [p], - on the right if [neg] is true, on the left otherwise. *) -let pad_string pad_char p neg s i len = - if p = len && i = 0 then s else - if p <= len then String.sub s i len else - let res = String.make p pad_char in - if neg - then String.blit s i res 0 len - else String.blit s i res (p - len) len; - res -;; - -(* Format a string given a %s format, e.g. %40s or %-20s. - To do ?: ignore other flags (#, +, etc). *) -let format_string sfmt s = - let (p, neg) = parse_string_conversion sfmt in - pad_string ' ' p neg s 0 (String.length s) -;; - -(* Extract a format string out of [fmt] between [start] and [stop] inclusive. - ['*'] in the format are replaced by integers taken from the [widths] list. - [extract_format] returns a string which is the string representation of - the resulting format string. *) -let extract_format fmt start stop widths = - let skip_positional_spec start = - match Sformat.unsafe_get fmt start with - | '0'..'9' -> - let rec skip_int_literal i = - match Sformat.unsafe_get fmt i with - | '0'..'9' -> skip_int_literal (succ i) - | '$' -> succ i - | _ -> start in - skip_int_literal (succ start) - | _ -> start in - let start = skip_positional_spec (succ start) in - let b = Buffer.create (stop - start + 10) in - Buffer.add_char b '%'; - let rec fill_format i widths = - if i <= stop then - match (Sformat.unsafe_get fmt i, widths) with - | ('*', h :: t) -> - Buffer.add_string b (string_of_int h); - let i = skip_positional_spec (succ i) in - fill_format i t - | ('*', []) -> - assert false (* Should not happen since this is ill-typed. *) - | (c, _) -> - Buffer.add_char b c; - fill_format (succ i) widths in - fill_format start (List.rev widths); - Buffer.contents b -;; - -let extract_format_int conv fmt start stop widths = - let sfmt = extract_format fmt start stop widths in - match conv with - | 'n' | 'N' -> - sfmt.[String.length sfmt - 1] <- 'u'; - sfmt - | _ -> sfmt -;; - -let extract_format_float conv fmt start stop widths = - let sfmt = extract_format fmt start stop widths in - match conv with - | 'F' -> - sfmt.[String.length sfmt - 1] <- 'g'; - sfmt - | _ -> sfmt -;; - -(* Returns the position of the next character following the meta format - string, starting from position [i], inside a given format [fmt]. - According to the character [conv], the meta format string is - enclosed by the delimiters %{ and %} (when [conv = '{']) or %( and - %) (when [conv = '(']). Hence, [sub_format] returns the index of - the character following the [')'] or ['}'] that ends the meta format, - according to the character [conv]. *) -let sub_format incomplete_format bad_conversion_format conv fmt i = - let len = Sformat.length fmt in - let rec sub_fmt c i = - let close = if c = '(' then ')' else (* '{' *) '}' in - let rec sub j = - if j >= len then incomplete_format fmt else - match Sformat.get fmt j with - | '%' -> sub_sub (succ j) - | _ -> sub (succ j) - and sub_sub j = - if j >= len then incomplete_format fmt else - match Sformat.get fmt j with - | '(' | '{' as c -> - let j = sub_fmt c (succ j) in - sub (succ j) - | '}' | ')' as c -> - if c = close then succ j else bad_conversion_format fmt i c - | _ -> sub (succ j) in - sub i in - sub_fmt conv i -;; - -let sub_format_for_printf conv = - sub_format incomplete_format bad_conversion_format conv -;; - -let iter_on_format_args fmt add_conv add_char = - - let lim = Sformat.length fmt - 1 in - - let rec scan_flags skip i = - if i > lim then incomplete_format fmt else - match Sformat.unsafe_get fmt i with - | '*' -> scan_flags skip (add_conv skip i 'i') - (* | '$' -> scan_flags skip (succ i) *** PR#4321 *) - | '#' | '-' | ' ' | '+' -> scan_flags skip (succ i) - | '_' -> scan_flags true (succ i) - | '0'..'9' - | '.' -> scan_flags skip (succ i) - | _ -> scan_conv skip i - and scan_conv skip i = - if i > lim then incomplete_format fmt else - match Sformat.unsafe_get fmt i with - | '%' | '@' | '!' | ',' -> succ i - | 's' | 'S' | '[' -> add_conv skip i 's' - | 'c' | 'C' -> add_conv skip i 'c' - | 'd' | 'i' |'o' | 'u' | 'x' | 'X' | 'N' -> add_conv skip i 'i' - | 'f' | 'e' | 'E' | 'g' | 'G' | 'F' -> add_conv skip i 'f' - | 'B' | 'b' -> add_conv skip i 'B' - | 'a' | 'r' | 't' as conv -> add_conv skip i conv - | 'l' | 'n' | 'L' as conv -> - let j = succ i in - if j > lim then add_conv skip i 'i' else begin - match Sformat.get fmt j with - | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' -> - add_char (add_conv skip i conv) 'i' - | _ -> add_conv skip i 'i' end - | '{' as conv -> - (* Just get a regular argument, skipping the specification. *) - let i = add_conv skip i conv in - (* To go on, find the index of the next char after the meta format. *) - let j = sub_format_for_printf conv fmt i in - (* Add the meta specification to the summary anyway. *) - let rec loop i = - if i < j - 2 then loop (add_char i (Sformat.get fmt i)) in - loop i; - (* Go on, starting at the closing brace to properly close the meta - specification in the summary. *) - scan_conv skip (j - 1) - | '(' as conv -> - (* Use the static format argument specification instead of - the runtime format argument value: they must have the same type - anyway. *) - scan_fmt (add_conv skip i conv) - | '}' | ')' as conv -> add_conv skip i conv - | conv -> bad_conversion_format fmt i conv - - and scan_fmt i = - if i < lim then - if Sformat.get fmt i = '%' - then scan_fmt (scan_flags false (succ i)) - else scan_fmt (succ i) - else i in - - ignore (scan_fmt 0) -;; - -(* Returns a string that summarizes the typing information that a given - format string contains. - For instance, [summarize_format_type "A number %d\n"] is "%i". - It also checks the well-formedness of the format string. *) -let summarize_format_type fmt = - let len = Sformat.length fmt in - let b = Buffer.create len in - let add_char i c = Buffer.add_char b c; succ i in - let add_conv skip i c = - if skip then Buffer.add_string b "%_" else Buffer.add_char b '%'; - add_char i c in - iter_on_format_args fmt add_conv add_char; - Buffer.contents b -;; - -module Ac = struct - type ac = { - mutable ac_rglr : int; - mutable ac_skip : int; - mutable ac_rdrs : int; - } -end -;; - -open Ac;; - -(* Computes the number of arguments of a format (including the flag - arguments if any). *) -let ac_of_format fmt = - let ac = { ac_rglr = 0; ac_skip = 0; ac_rdrs = 0; } in - let incr_ac skip c = - let inc = if c = 'a' then 2 else 1 in - if c = 'r' then ac.ac_rdrs <- ac.ac_rdrs + 1; - if skip - then ac.ac_skip <- ac.ac_skip + inc - else ac.ac_rglr <- ac.ac_rglr + inc in - let add_conv skip i c = - (* Just finishing a meta format: no additional argument to record. *) - if c <> ')' && c <> '}' then incr_ac skip c; - succ i - and add_char i _ = succ i in - - iter_on_format_args fmt add_conv add_char; - ac -;; - -let count_printing_arguments_of_format fmt = - let ac = ac_of_format fmt in - (* For printing, only the regular arguments have to be counted. *) - ac.ac_rglr -;; - -let list_iter_i f l = - let rec loop i = function - | [] -> () - | [x] -> f i x (* Tail calling [f] *) - | x :: xs -> f i x; loop (succ i) xs in - loop 0 l -;; - -(* 'Abstracting' version of kprintf: returns a (curried) function that - will print when totally applied. - Note: in the following, we are careful not to be badly caught - by the compiler optimizations for the representation of arrays. *) -let kapr kpr fmt = - match count_printing_arguments_of_format fmt with - | 0 -> kpr fmt [||] - | 1 -> Obj.magic (fun x -> - let a = Array.make 1 (Obj.repr 0) in - a.(0) <- x; - kpr fmt a) - | 2 -> Obj.magic (fun x y -> - let a = Array.make 2 (Obj.repr 0) in - a.(0) <- x; a.(1) <- y; - kpr fmt a) - | 3 -> Obj.magic (fun x y z -> - let a = Array.make 3 (Obj.repr 0) in - a.(0) <- x; a.(1) <- y; a.(2) <- z; - kpr fmt a) - | 4 -> Obj.magic (fun x y z t -> - let a = Array.make 4 (Obj.repr 0) in - a.(0) <- x; a.(1) <- y; a.(2) <- z; - a.(3) <- t; - kpr fmt a) - | 5 -> Obj.magic (fun x y z t u -> - let a = Array.make 5 (Obj.repr 0) in - a.(0) <- x; a.(1) <- y; a.(2) <- z; - a.(3) <- t; a.(4) <- u; - kpr fmt a) - | 6 -> Obj.magic (fun x y z t u v -> - let a = Array.make 6 (Obj.repr 0) in - a.(0) <- x; a.(1) <- y; a.(2) <- z; - a.(3) <- t; a.(4) <- u; a.(5) <- v; - kpr fmt a) - | nargs -> - let rec loop i args = - if i >= nargs then - let a = Array.make nargs (Obj.repr 0) in - list_iter_i (fun i arg -> a.(nargs - i - 1) <- arg) args; - kpr fmt a - else Obj.magic (fun x -> loop (succ i) (x :: args)) in - loop 0 [] -;; - -type positional_specification = - | Spec_none | Spec_index of Sformat.index -;; - -(* To scan an optional positional parameter specification, - i.e. an integer followed by a [$]. - - Calling [got_spec] with appropriate arguments, we 'return' a positional - specification and an index to go on scanning the [fmt] format at hand. - - Note that this is optimized for the regular case, i.e. no positional - parameter, since in this case we juste 'return' the constant - [Spec_none]; in case we have a positional parameter, we 'return' a - [Spec_index] [positional_specification] which is a bit more costly. - - Note also that we do not support [*$] specifications, since this would - lead to type checking problems: a [*$] positional specification means - 'take the next argument to [printf] (which must be an integer value)', - name this integer value $n$; [*$] now designates parameter $n$. - - Unfortunately, the type of a parameter specified via a [*$] positional - specification should be the type of the corresponding argument to - [printf], hence this should be the type of the $n$-th argument to [printf] - with $n$ being the {\em value} of the integer argument defining [*]; we - clearly cannot statically guess the value of this parameter in the general - case. Put it another way: this means type dependency, which is completely - out of scope of the OCaml type algebra. *) - -let scan_positional_spec fmt got_spec i = - match Sformat.unsafe_get fmt i with - | '0'..'9' as d -> - let rec get_int_literal accu j = - match Sformat.unsafe_get fmt j with - | '0'..'9' as d -> - get_int_literal (10 * accu + (int_of_char d - 48)) (succ j) - | '$' -> - if accu = 0 then - failwith "printf: bad positional specification (0)." else - got_spec (Spec_index (Sformat.index_of_literal_position accu)) (succ j) - (* Not a positional specification: tell so the caller, and go back to - scanning the format from the original [i] position we were called at - first. *) - | _ -> got_spec Spec_none i in - get_int_literal (int_of_char d - 48) (succ i) - (* No positional specification: tell so the caller, and go back to scanning - the format from the original [i] position. *) - | _ -> got_spec Spec_none i -;; - -(* Get the index of the next argument to printf, according to the given - positional specification. *) -let next_index spec n = - match spec with - | Spec_none -> Sformat.succ_index n - | Spec_index _ -> n -;; - -(* Get the index of the actual argument to printf, according to its - optional positional specification. *) -let get_index spec n = - match spec with - | Spec_none -> n - | Spec_index p -> p -;; - -(* Format a float argument as a valid OCaml lexeme. *) -let format_float_lexeme = - - (* To be revised: this procedure should be a unique loop that performs the - validity check and the string lexeme modification at the same time. - Otherwise, it is too difficult to handle the strange padding facilities - given by printf. Let alone handling the correct widths indication, - knowing that we have sometime to add a '.' at the end of the result! - *) - - let make_valid_float_lexeme s = - (* Check if s is already a valid lexeme: - in this case do nothing, - otherwise turn s into a valid OCaml lexeme. *) - let l = String.length s in - let rec valid_float_loop i = - if i >= l then s ^ "." else - match s.[i] with - (* Sure, this is already a valid float lexeme. *) - | '.' | 'e' | 'E' -> s - | _ -> valid_float_loop (i + 1) in - - valid_float_loop 0 in - - (fun sfmt x -> - match classify_float x with - | FP_normal | FP_subnormal | FP_zero -> - make_valid_float_lexeme (format_float sfmt x) - | FP_infinite -> - if x < 0.0 then "neg_infinity" else "infinity" - | FP_nan -> - "nan") -;; - -(* Decode a format string and act on it. - [fmt] is the [printf] format string, and [pos] points to a [%] character in - the format string. - After consuming the appropriate number of arguments and formatting - them, one of the following five continuations described below is called: - - - [cont_s] for outputting a string - (arguments: arg num, string, next pos) - - [cont_a] for performing a %a action - (arguments: arg num, fn, arg, next pos) - - [cont_t] for performing a %t action - (arguments: arg num, fn, next pos) - - [cont_f] for performing a flush action - (arguments: arg num, next pos) - - [cont_m] for performing a %( action - (arguments: arg num, sfmt, next pos) - - "arg num" is the index in array [args] of the next argument to [printf]. - "next pos" is the position in [fmt] of the first character following - the %conversion specification in [fmt]. *) - -(* Note: here, rather than test explicitly against [Sformat.length fmt] - to detect the end of the format, we use [Sformat.unsafe_get] and - rely on the fact that we'll get a "null" character if we access - one past the end of the string. These "null" characters are then - caught by the [_ -> bad_conversion] clauses below. - Don't do this at home, kids. *) -let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m = - - let get_arg spec n = - Obj.magic (args.(Sformat.int_of_index (get_index spec n))) in - - let rec scan_positional n widths i = - let got_spec spec i = scan_flags spec n widths i in - scan_positional_spec fmt got_spec i - - and scan_flags spec n widths i = - match Sformat.unsafe_get fmt i with - | '*' -> - let got_spec wspec i = - let (width : int) = get_arg wspec n in - scan_flags spec (next_index wspec n) (width :: widths) i in - scan_positional_spec fmt got_spec (succ i) - | '0'..'9' - | '.' | '#' | '-' | ' ' | '+' -> scan_flags spec n widths (succ i) - | _ -> scan_conv spec n widths i - - and scan_conv spec n widths i = - match Sformat.unsafe_get fmt i with - | '%' | '@' as c -> - cont_s n (String.make 1 c) (succ i) - | '!' -> cont_f n (succ i) - | ',' -> cont_s n "" (succ i) - | 's' | 'S' as conv -> - let (x : string) = get_arg spec n in - let x = if conv = 's' then x else "\"" ^ String.escaped x ^ "\"" in - let s = - (* Optimize for common case %s *) - if i = succ pos then x else - format_string (extract_format fmt pos i widths) x in - cont_s (next_index spec n) s (succ i) - | '[' as conv -> - bad_conversion_format fmt i conv - | 'c' | 'C' as conv -> - let (x : char) = get_arg spec n in - let s = - if conv = 'c' then String.make 1 x else "'" ^ Char.escaped x ^ "'" in - cont_s (next_index spec n) s (succ i) - | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' | 'N' as conv -> - let (x : int) = get_arg spec n in - let s = - format_int (extract_format_int conv fmt pos i widths) x in - cont_s (next_index spec n) s (succ i) - | 'f' | 'e' | 'E' | 'g' | 'G' -> - let (x : float) = get_arg spec n in - let s = format_float (extract_format fmt pos i widths) x in - cont_s (next_index spec n) s (succ i) - | 'F' as conv -> - let (x : float) = get_arg spec n in - let s = - format_float_lexeme - (if widths = [] - then "%.12g" - else extract_format_float conv fmt pos i widths) - x in - cont_s (next_index spec n) s (succ i) - | 'B' | 'b' -> - let (x : bool) = get_arg spec n in - cont_s (next_index spec n) (string_of_bool x) (succ i) - | 'a' -> - let printer = get_arg spec n in - (* If the printer spec is Spec_none, go on as usual. - If the printer spec is Spec_index p, - printer's argument spec is Spec_index (succ_index p). *) - let n = Sformat.succ_index (get_index spec n) in - let arg = get_arg Spec_none n in - cont_a (next_index spec n) printer arg (succ i) - | 'r' as conv -> - bad_conversion_format fmt i conv - | 't' -> - let printer = get_arg spec n in - cont_t (next_index spec n) printer (succ i) - | 'l' | 'n' | 'L' as conv -> - begin match Sformat.unsafe_get fmt (succ i) with - | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' -> - let i = succ i in - let s = - match conv with - | 'l' -> - let (x : int32) = get_arg spec n in - format_int32 (extract_format fmt pos i widths) x - | 'n' -> - let (x : nativeint) = get_arg spec n in - format_nativeint (extract_format fmt pos i widths) x - | _ -> - let (x : int64) = get_arg spec n in - format_int64 (extract_format fmt pos i widths) x in - cont_s (next_index spec n) s (succ i) - | _ -> - let (x : int) = get_arg spec n in - let s = format_int (extract_format_int 'n' fmt pos i widths) x in - cont_s (next_index spec n) s (succ i) - end - | '{' | '(' as conv (* ')' '}' *) -> - let (xf : ('a, 'b, 'c, 'd, 'e, 'f) format6) = get_arg spec n in - let i = succ i in - let i = sub_format_for_printf conv fmt i in - if conv = '{' (* '}' *) then - (* Just print the format argument as a specification. *) - cont_s - (next_index spec n) - (summarize_format_type xf) - i else - (* Use the format argument instead of the format specification. *) - cont_m (next_index spec n) xf i - | (* '(' *) ')' -> - cont_s n "" (succ i) - | conv -> - bad_conversion_format fmt i conv in - - scan_positional n [] (succ pos) -;; - -let mkprintf to_s get_out outc outs flush k fmt = - - (* [out] is global to this definition of [pr], and must be shared by all its - recursive calls (if any). *) - let out = get_out fmt in - let outc c = outc out c in - let outs s = outs out s in - - let rec pr k n fmt v = - - let len = Sformat.length fmt in - - let rec doprn n i = - if i >= len then Obj.magic (k out) else - match Sformat.unsafe_get fmt i with - | '%' -> scan_format fmt v n i cont_s cont_a cont_t cont_f cont_m - | c -> outc c; doprn n (succ i) - - and cont_s n s i = - outs s; doprn n i - and cont_a n printer arg i = - if to_s then - outs ((Obj.magic printer : unit -> _ -> string) () arg) - else - printer out arg; - doprn n i - and cont_t n printer i = - if to_s then - outs ((Obj.magic printer : unit -> string) ()) - else - printer out; - doprn n i - and cont_f n i = - flush out; doprn n i - and cont_m n xf i = - let m = - Sformat.add_int_index - (count_printing_arguments_of_format xf) n in - pr (Obj.magic (fun _ -> doprn m i)) n xf v in - - doprn n 0 in - - let kpr = pr k (Sformat.index_of_int 0) in - - kapr kpr fmt -;; - -(************************************************************** - - Defining [fprintf] and various flavors of [fprintf]. - - **************************************************************) - -let kfprintf k oc = - mkprintf false (fun _ -> oc) output_char output_string flush k -;; -let ikfprintf k oc = kapr (fun _ _ -> Obj.magic (k oc));; - -let fprintf oc = kfprintf ignore oc;; -let ifprintf oc = ikfprintf ignore oc;; -let printf fmt = fprintf stdout fmt;; -let eprintf fmt = fprintf stderr fmt;; - -let kbprintf k b = - mkprintf false (fun _ -> b) Buffer.add_char Buffer.add_string ignore k -;; -let bprintf b = kbprintf ignore b;; - -let get_buff fmt = - let len = 2 * Sformat.length fmt in - Buffer.create len -;; - -let get_contents b = - let s = Buffer.contents b in - Buffer.clear b; - s -;; - -let get_cont k b = k (get_contents b);; - -let ksprintf k = - mkprintf true get_buff Buffer.add_char Buffer.add_string ignore (get_cont k) -;; - -let sprintf fmt = ksprintf (fun s -> s) fmt;; - -(************************************************************** - - Deprecated stuff. - - **************************************************************) - -let kprintf = ksprintf;; - -(* For OCaml system internal use only: needed to implement modules [Format] - and [Scanf]. *) - -module CamlinternalPr = struct - - module Sformat = Sformat;; - - module Tformat = struct - - type ac = - Ac.ac = { - mutable ac_rglr : int; - mutable ac_skip : int; - mutable ac_rdrs : int; - } - ;; - - let ac_of_format = ac_of_format;; - - let count_printing_arguments_of_format = - count_printing_arguments_of_format;; - - let sub_format = sub_format;; - - let summarize_format_type = summarize_format_type;; - - let scan_format = scan_format;; - - let kapr = kapr;; - - end - ;; - -end -;; +open CamlinternalFormatBasics +open CamlinternalFormat + +let kfprintf k o (Format (fmt, _)) = + make_printf (fun o acc -> output_acc o acc; k o) o End_of_acc fmt +let kbprintf k b (Format (fmt, _)) = + make_printf (fun b acc -> bufput_acc b acc; k b) b End_of_acc fmt +let ikfprintf k oc (Format (fmt, _)) = + make_printf (fun oc _ -> k oc) oc End_of_acc fmt + +let fprintf oc fmt = kfprintf ignore oc fmt +let bprintf b fmt = kbprintf ignore b fmt +let ifprintf oc fmt = ikfprintf ignore oc fmt +let printf fmt = fprintf stdout fmt +let eprintf fmt = fprintf stderr fmt + +let ksprintf k (Format (fmt, _)) = + let k' () acc = + let buf = Buffer.create 64 in + strput_acc buf acc; + k (Buffer.contents buf) in + make_printf k' () End_of_acc fmt + +let sprintf fmt = ksprintf (fun s -> s) fmt + +let kprintf = ksprintf diff --git a/stdlib/printf.mli b/stdlib/printf.mli index a75a64181..21e28159a 100644 --- a/stdlib/printf.mli +++ b/stdlib/printf.mli @@ -163,76 +163,5 @@ val kbprintf : (Buffer.t -> 'a) -> Buffer.t -> (** Deprecated *) -val kprintf : (string -> 'a) -> ('b, unit, string, 'a) format4 -> 'b;; +val kprintf : (string -> 'a) -> ('b, unit, string, 'a) format4 -> 'b (** A deprecated synonym for [ksprintf]. *) - -(**/**) - -(* The following is for system use only. Do not call directly. *) - -module CamlinternalPr : sig - - module Sformat : sig - type index;; - - val index_of_int : int -> index;; - external int_of_index : index -> int = "%identity";; - external unsafe_index_of_int : int -> index = "%identity";; - - val succ_index : index -> index;; - val add_int_index : int -> index -> index;; - - val sub : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> index -> int -> string;; - val to_string : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> string;; - external length : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> int - = "%string_length";; - external get : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> int -> char - = "%string_safe_get";; - external unsafe_to_string : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> string - = "%identity";; - external unsafe_get : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> int -> char - = "%string_unsafe_get";; - - end;; - - module Tformat : sig - - type ac = { - mutable ac_rglr : int; - mutable ac_skip : int; - mutable ac_rdrs : int; - };; - - val ac_of_format : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> ac;; - val count_printing_arguments_of_format : - ('a, 'b, 'c, 'd, 'e, 'f) format6 -> int;; - - val sub_format : - (('a, 'b, 'c, 'd, 'e, 'f) format6 -> int) -> - (('a, 'b, 'c, 'd, 'e, 'f) format6 -> int -> char -> int) -> - char -> - ('a, 'b, 'c, 'd, 'e, 'f) format6 -> - int -> - int - - val summarize_format_type : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> string - - val scan_format : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> - 'g array -> - Sformat.index -> - int -> - (Sformat.index -> string -> int -> 'h) -> - (Sformat.index -> 'i -> 'j -> int -> 'h) -> - (Sformat.index -> 'k -> int -> 'h) -> - (Sformat.index -> int -> 'h) -> - (Sformat.index -> ('l, 'm, 'n, 'o, 'p, 'q) format6 -> int -> 'h) -> - 'h - - val kapr : - (('a, 'b, 'c, 'd, 'e, 'f) format6 -> Obj.t array -> 'g) -> - ('a, 'b, 'c, 'd, 'e, 'f) format6 -> - 'g - - end;; - -end;; diff --git a/stdlib/queue.mli b/stdlib/queue.mli index 55e898832..5cd1136e4 100644 --- a/stdlib/queue.mli +++ b/stdlib/queue.mli @@ -16,7 +16,7 @@ This module implements queues (FIFOs), with in-place modification. {b Warning} This module is not thread-safe: each {!Queue.t} value - must be protected from concurrent access (e.g. with a {!Mutex.t}). + must be protected from concurrent access (e.g. with a [Mutex.t]). Failure to do so can lead to a crash. *) diff --git a/stdlib/scanf.ml b/stdlib/scanf.ml index 8f694fd3a..2a63ced9a 100644 --- a/stdlib/scanf.ml +++ b/stdlib/scanf.ml @@ -11,6 +11,19 @@ (* *) (***********************************************************************) +open CamlinternalFormatBasics +open CamlinternalFormat + +(* alias to avoid warning for ambiguity between + Pervasives.format6 + and CamlinternalFormatBasics.format6 + + (the former is in fact an alias for the latter, + but the ambiguity warning doesn't care) +*) +type ('a, 'b, 'c, 'd, 'e, 'f) format6 = + ('a, 'b, 'c, 'd, 'e, 'f) Pervasives.format6 + (* The run-time library for scanners. *) (* Scanning buffers. *) @@ -318,17 +331,17 @@ module Scanning : SCANNING = struct let from_ic scan_close_ic iname ic = let len = !file_buffer_size in - let buf = String.create len in + let buf = Bytes.create len in let i = ref 0 in let lim = ref 0 in let eof = ref false in let next () = - if !i < !lim then begin let c = buf.[!i] in incr i; c end else + if !i < !lim then begin let c = Bytes.get buf !i in incr i; c end else if !eof then raise End_of_file else begin lim := input ic buf 0 len; if !lim = 0 then begin eof := true; scan_close_ic ic end else begin i := 1; - buf.[0] + Bytes.get buf 0 end end in create iname next @@ -402,11 +415,6 @@ end type ('a, 'b, 'c, 'd) scanner = ('a, Scanning.in_channel, 'b, 'c, 'a -> 'd, 'd) format6 -> 'c -;; - -external string_to_format : - string -> ('a, 'b, 'c, 'd, 'e, 'f) format6 = "%identity" -;; (* Reporting errors. *) exception Scan_failure of string;; @@ -429,33 +437,6 @@ let bad_end_of_input message = (Printf.sprintf "scanning of %s failed: \ premature end of file occurred before end of token" message) -;; - -let int_of_width_opt = function - | None -> max_int - | Some width -> width -;; - -let int_of_prec_opt = function - | None -> max_int - | Some prec -> prec -;; - -module Sformat = Printf.CamlinternalPr.Sformat;; -module Tformat = Printf.CamlinternalPr.Tformat;; - -let bad_conversion fmt i c = - invalid_arg - (Printf.sprintf - "scanf: bad conversion %%%C, at char number %i \ - in format string \'%s\'" c i (Sformat.to_string fmt)) -;; - -let incomplete_format fmt = - invalid_arg - (Printf.sprintf "scanf: premature end of format string \'%s\'" - (Sformat.to_string fmt)) -;; let bad_float () = bad_input "no dot or exponent part found in float token" @@ -467,19 +448,15 @@ let character_mismatch_err c ci = let character_mismatch c ci = bad_input (character_mismatch_err c ci) -;; - -let format_mismatch_err fmt1 fmt2 = - Printf.sprintf - "format read \'%s\' does not match specification \'%s\'" fmt1 fmt2 -;; - -let format_mismatch fmt1 fmt2 = bad_input (format_mismatch_err fmt1 fmt2);; -(* Checking that 2 format strings are type compatible. *) -let compatible_format_type fmt1 fmt2 = - Tformat.summarize_format_type (string_to_format fmt1) = - Tformat.summarize_format_type (string_to_format fmt2);; +let rec skip_whites ib = + let c = Scanning.peek_char ib in + if not (Scanning.eof ib) then begin + match c with + | ' ' | '\t' | '\n' | '\r' -> + Scanning.invalidate_current_char ib; skip_whites ib + | _ -> () + end (* Checking that [c] is indeed in the input, then skips it. In this case, the character [c] has been explicitly specified in the @@ -496,28 +473,13 @@ let compatible_format_type fmt1 fmt2 = We are also careful to treat "\r\n" in the input as an end of line marker: it always matches a '\n' specification in the input format string. *) let rec check_char ib c = - let ci = Scanning.checked_peek_char ib in - if ci = c then Scanning.invalidate_current_char ib else begin - match ci with - | '\r' when c = '\n' -> - Scanning.invalidate_current_char ib; check_char ib '\n' - | _ -> character_mismatch c ci - end -;; - -(* Checks that the current char is indeed one of the stopper characters, - then skips it. - Be careful that if ib has no more character this procedure should - just do nothing (since %s@c defaults to the entire rest of the - buffer, when no character c can be found in the input). *) -let ignore_stoppers stps ib = - if stps <> [] && not (Scanning.eof ib) then - let ci = Scanning.peek_char ib in - if List.memq ci stps then Scanning.invalidate_current_char ib else - let sr = String.concat "" (List.map (String.make 1) stps) in - bad_input - (Printf.sprintf "looking for one of range %S, found %C" sr ci) -;; + if c = ' ' then skip_whites ib else + let ci = Scanning.checked_peek_char ib in + if ci = c then Scanning.invalidate_current_char ib else + match ci with + | '\r' when c = '\n' -> + Scanning.invalidate_current_char ib; check_char ib '\n' + | _ -> character_mismatch c ci (* Extracting tokens from the output token buffer. *) @@ -701,7 +663,7 @@ let scan_optionally_signed_int width ib = scan_unsigned_int width ib ;; -let scan_int_conv conv width _prec ib = +let scan_int_conv conv width ib = match conv with | 'b' -> scan_binary_int width ib | 'd' -> scan_optionally_signed_decimal_int width ib @@ -791,7 +753,7 @@ let scan_float width precision ib = scan_exp_part width ib, precision ;; -let scan_Float width precision ib = +let scan_caml_float width precision ib = let width = scan_optionally_signed_decimal_int width ib in if width = 0 then bad_float () else let c = Scanning.peek_char ib in @@ -805,12 +767,11 @@ let scan_Float width precision ib = | 'e' | 'E' -> scan_exp_part width ib | _ -> bad_float () -;; (* Scan a regular string: stops when encountering a space, if no scanning indication has been given; - otherwise, stops when encountering one of the characters in the scanning - indication list [stp]. + otherwise, stops when encountering the characters in the scanning + indication [stp]. It also stops at end of file or when the maximum number of characters has been read.*) let scan_string stp width ib = @@ -818,12 +779,14 @@ let scan_string stp width ib = if width = 0 then width else let c = Scanning.peek_char ib in if Scanning.eof ib then width else - if stp = [] then - match c with - | ' ' | '\t' | '\n' | '\r' -> width - | c -> loop (Scanning.store_char width ib c) else - if List.memq c stp then Scanning.skip_char width ib else - loop (Scanning.store_char width ib c) in + match stp with + | Some c' when c = c' -> Scanning.skip_char width ib + | Some _ -> loop (Scanning.store_char width ib c) + | None -> + match c with + | ' ' | '\t' | '\n' | '\r' -> width + | _ -> loop (Scanning.store_char width ib c) + in loop width ;; @@ -925,7 +888,7 @@ let scan_backslash_char width ib = ;; (* Scan a character (an OCaml token). *) -let scan_Char width ib = +let scan_caml_char width ib = let rec find_start width = match Scanning.checked_peek_char ib with @@ -948,7 +911,7 @@ let scan_Char width ib = ;; (* Scan a delimited string (an OCaml token). *) -let scan_String width ib = +let scan_caml_string width ib = let rec find_start width = match Scanning.checked_peek_char ib with @@ -981,8 +944,7 @@ let scan_String width ib = ;; (* Scan a boolean (an OCaml token). *) -let scan_bool width ib = - if width < 4 then bad_token_length "a boolean" else +let scan_bool ib = let c = Scanning.checked_peek_char ib in let m = match c with @@ -991,560 +953,384 @@ let scan_bool width ib = | c -> bad_input (Printf.sprintf "the character %C cannot start a boolean" c) in - scan_string [] (min width m) ib -;; - -(* Reading char sets in %[...] conversions. *) -type char_set = - | Pos_set of string (* Positive (regular) set. *) - | Neg_set of string (* Negative (complementary) set. *) -;; - - -(* Char sets are read as sub-strings in the format string. *) -let scan_range fmt j = - - let len = Sformat.length fmt in - - let buffer = Buffer.create len in - - let rec scan_closing j = - if j >= len then incomplete_format fmt else - match Sformat.get fmt j with - | ']' -> j, Buffer.contents buffer - | '%' -> - let j = j + 1 in - if j >= len then incomplete_format fmt else - begin match Sformat.get fmt j with - | '%' | '@' as c -> - Buffer.add_char buffer c; - scan_closing (j + 1) - | c -> bad_conversion fmt j c - end - | c -> - Buffer.add_char buffer c; - scan_closing (j + 1) in - - let scan_first_pos j = - if j >= len then incomplete_format fmt else - match Sformat.get fmt j with - | ']' as c -> - Buffer.add_char buffer c; - scan_closing (j + 1) - | _ -> scan_closing j in - - let scan_first_neg j = - if j >= len then incomplete_format fmt else - match Sformat.get fmt j with - | '^' -> - let j = j + 1 in - let k, char_set = scan_first_pos j in - k, Neg_set char_set - | _ -> - let k, char_set = scan_first_pos j in - k, Pos_set char_set in - - scan_first_neg j -;; - -(* Char sets are now represented as bit vectors that are represented as - byte strings. *) - -(* Bit manipulations into bytes. *) -let set_bit_of_byte byte idx b = - (b lsl idx) lor (byte land (* mask idx *) (lnot (1 lsl idx))) -;; - -let get_bit_of_byte byte idx = (byte lsr idx) land 1;; - -(* Bit manipulations in vectors of bytes represented as strings. *) -let set_bit_of_range r c b = - let idx = c land 0x7 in - let ydx = c lsr 3 in - let byte = r.[ydx] in - r.[ydx] <- char_of_int (set_bit_of_byte (int_of_char byte) idx b) -;; - -let get_bit_of_range r c = - let idx = c land 0x7 in - let ydx = c lsr 3 in - let byte = r.[ydx] in - get_bit_of_byte (int_of_char byte) idx -;; - -(* Char sets represented as bit vectors represented as fixed length byte - strings. *) -(* Create a full or empty set of chars. *) -let make_range bit = - let c = char_of_int (if bit = 0 then 0 else 0xFF) in - String.make 32 c -;; - -(* Test if a char belongs to a set of chars. *) -let get_char_in_range r c = get_bit_of_range r (int_of_char c);; - -let bit_not b = (lnot b) land 1;; - -(* Build the bit vector corresponding to the set of characters - that belongs to the string argument [set]. - (In the [Scanf] module [set] is always a sub-string of the format.) *) -let make_char_bit_vect bit set = - let r = make_range (bit_not bit) in - let lim = String.length set - 1 in - let rec loop bit rp i = - if i <= lim then - match set.[i] with - | '-' when rp -> - (* if i = 0 then rp is false (since the initial call is - loop bit false 0). Hence i >= 1 and the following is safe. *) - let c1 = set.[i - 1] in - let i = succ i in - if i > lim then loop bit false (i - 1) else - let c2 = set.[i] in - for j = int_of_char c1 to int_of_char c2 do - set_bit_of_range r j bit done; - loop bit false (succ i) - | _ -> - set_bit_of_range r (int_of_char set.[i]) bit; - loop bit true (succ i) in - loop bit false 0; - r -;; - -(* Compute the predicate on chars corresponding to a char set. *) -let make_predicate bit set stp = - let r = make_char_bit_vect bit set in - List.iter - (fun c -> set_bit_of_range r (int_of_char c) (bit_not bit)) stp; - (fun c -> get_char_in_range r c) -;; - -let make_setp stp char_set = - match char_set with - | Pos_set set -> - begin match String.length set with - | 0 -> (fun _ -> 0) - | 1 -> - let p = set.[0] in - (fun c -> if c == p then 1 else 0) - | 2 -> - let p1 = set.[0] and p2 = set.[1] in - (fun c -> if c == p1 || c == p2 then 1 else 0) - | 3 -> - let p1 = set.[0] and p2 = set.[1] and p3 = set.[2] in - if p2 = '-' then make_predicate 1 set stp else - (fun c -> if c == p1 || c == p2 || c == p3 then 1 else 0) - | _ -> make_predicate 1 set stp - end - | Neg_set set -> - begin match String.length set with - | 0 -> (fun _ -> 1) - | 1 -> - let p = set.[0] in - (fun c -> if c != p then 1 else 0) - | 2 -> - let p1 = set.[0] and p2 = set.[1] in - (fun c -> if c != p1 && c != p2 then 1 else 0) - | 3 -> - let p1 = set.[0] and p2 = set.[1] and p3 = set.[2] in - if p2 = '-' then make_predicate 0 set stp else - (fun c -> if c != p1 && c != p2 && c != p3 then 1 else 0) - | _ -> make_predicate 0 set stp - end -;; - -let setp_table = Hashtbl.create 7;; - -let add_setp stp char_set setp = - let char_set_tbl = - try Hashtbl.find setp_table char_set with - | Not_found -> - let char_set_tbl = Hashtbl.create 3 in - Hashtbl.add setp_table char_set char_set_tbl; - char_set_tbl in - Hashtbl.add char_set_tbl stp setp -;; + scan_string None m ib -let find_setp stp char_set = - try Hashtbl.find (Hashtbl.find setp_table char_set) stp with - | Not_found -> - let setp = make_setp stp char_set in - add_setp stp char_set setp; - setp -;; - -let scan_chars_in_char_set stp char_set width ib = - let rec loop_pos1 cp1 width = - if width = 0 then width else - let c = Scanning.peek_char ib in - if Scanning.eof ib then width else - if c == cp1 - then loop_pos1 cp1 (Scanning.store_char width ib c) - else width - and loop_pos2 cp1 cp2 width = - if width = 0 then width else - let c = Scanning.peek_char ib in - if Scanning.eof ib then width else - if c == cp1 || c == cp2 - then loop_pos2 cp1 cp2 (Scanning.store_char width ib c) - else width - and loop_pos3 cp1 cp2 cp3 width = - if width = 0 then width else - let c = Scanning.peek_char ib in - if Scanning.eof ib then width else - if c == cp1 || c == cp2 || c == cp3 - then loop_pos3 cp1 cp2 cp3 (Scanning.store_char width ib c) - else width - and loop_neg1 cp1 width = - if width = 0 then width else +(* Scan a string containing elements in char_set and terminated by scan_indic + if provided. *) +let scan_chars_in_char_set char_set scan_indic width ib = + let rec scan_chars i stp = let c = Scanning.peek_char ib in - if Scanning.eof ib then width else - if c != cp1 - then loop_neg1 cp1 (Scanning.store_char width ib c) - else width - and loop_neg2 cp1 cp2 width = - if width = 0 then width else - let c = Scanning.peek_char ib in - if Scanning.eof ib then width else - if c != cp1 && c != cp2 - then loop_neg2 cp1 cp2 (Scanning.store_char width ib c) - else width - and loop_neg3 cp1 cp2 cp3 width = - if width = 0 then width else - let c = Scanning.peek_char ib in - if Scanning.eof ib then width else - if c != cp1 && c != cp2 && c != cp3 - then loop_neg3 cp1 cp2 cp3 (Scanning.store_char width ib c) - else width - and loop setp width = - if width = 0 then width else - let c = Scanning.peek_char ib in - if Scanning.eof ib then width else - if setp c == 1 - then loop setp (Scanning.store_char width ib c) - else width in - - let width = - match char_set with - | Pos_set set -> - begin match String.length set with - | 0 -> loop (fun _ -> 0) width - | 1 -> loop_pos1 set.[0] width - | 2 -> loop_pos2 set.[0] set.[1] width - | 3 when set.[1] != '-' -> loop_pos3 set.[0] set.[1] set.[2] width - | _ -> loop (find_setp stp char_set) width end - | Neg_set set -> - begin match String.length set with - | 0 -> loop (fun _ -> 1) width - | 1 -> loop_neg1 set.[0] width - | 2 -> loop_neg2 set.[0] set.[1] width - | 3 when set.[1] != '-' -> loop_neg3 set.[0] set.[1] set.[2] width - | _ -> loop (find_setp stp char_set) width end in - ignore_stoppers stp ib; - width -;; - -let get_count t ib = - match t with - | 'l' -> Scanning.line_count ib - | 'n' -> Scanning.char_count ib - | _ -> Scanning.token_count ib -;; - -let rec skip_whites ib = - let c = Scanning.peek_char ib in - if not (Scanning.eof ib) then begin - match c with - | ' ' | '\t' | '\n' | '\r' -> - Scanning.invalidate_current_char ib; skip_whites ib - | _ -> () - end -;; + if i > 0 && not (Scanning.eof ib) && is_in_char_set char_set c && + int_of_char c <> stp then + let _ = Scanning.store_char max_int ib c in + scan_chars (i - 1) stp; + in + match scan_indic with + | None -> scan_chars width (-1); + | Some c -> + scan_chars width (int_of_char c); + if not (Scanning.eof ib) then + let ci = Scanning.peek_char ib in + if c = ci then Scanning.invalidate_current_char ib + else character_mismatch c ci (* The global error report function for [Scanf]. *) let scanf_bad_input ib = function | Scan_failure s | Failure s -> let i = Scanning.char_count ib in - bad_input (Printf.sprintf "scanf: bad input at char number %i: \'%s\'" i s) + bad_input (Printf.sprintf "scanf: bad input at char number %i: %S" i s) | x -> raise x -;; -let list_iter_i f l = - let rec loop i = function - | [] -> () - | [x] -> f i x (* Tail calling [f] *) - | x :: xs -> f i x; loop (succ i) xs in - loop 0 l -;; +(* Get the content of a counter from an input buffer. *) +let get_counter ib counter = match counter with + | Line_counter -> Scanning.line_count ib + | Char_counter -> Scanning.char_count ib + | Token_counter -> Scanning.token_count ib -let ascanf sc fmt = - let ac = Tformat.ac_of_format fmt in - match ac.Tformat.ac_rdrs with - | 0 -> - Obj.magic (fun f -> sc fmt [||] f) - | 1 -> - Obj.magic (fun x f -> sc fmt [| Obj.repr x |] f) - | 2 -> - Obj.magic (fun x y f -> sc fmt [| Obj.repr x; Obj.repr y; |] f) - | 3 -> - Obj.magic - (fun x y z f -> sc fmt [| Obj.repr x; Obj.repr y; Obj.repr z; |] f) - | nargs -> - let rec loop i args = - if i >= nargs then - let a = Array.make nargs (Obj.repr 0) in - list_iter_i (fun i arg -> a.(nargs - i - 1) <- arg) args; - Obj.magic (fun f -> sc fmt a f) - else Obj.magic (fun x -> loop (succ i) (x :: args)) in - loop 0 [] -;; - -(* The [scan_format] main scanning function. - It takes as arguments: - - an input buffer [ib] from which to read characters, - - an error handling function [ef], - - a format [fmt] that specifies what to read in the input, - - a vector of user's defined readers [rv], - - and a function [f] to pass the tokens read to. - - Then [scan_format] scans the format and the input buffer in parallel to - find out tokens as specified by the format; when it finds one token, it - converts it as specified, remembers the converted value as a future - argument to the function [f], and continues scanning. - - If the entire scanning succeeds (i.e. the format string has been - exhausted and the buffer has provided tokens according to the - format string), [f] is applied to the tokens read. - - If the scanning or some conversion fails, the main scanning function - aborts and applies the scanning buffer and a string that explains - the error to the error handling function [ef] (the error continuation). *) - -let scan_format ib ef fmt rv f = - - let limr = Array.length rv - 1 in - - let return v = Obj.magic v () in - let delay f x () = f x in - let stack f = delay (return f) in - let no_stack f _x = f in - - let rec scan fmt = - - let lim = Sformat.length fmt - 1 in - - let rec scan_fmt ir f i = - if i > lim then ir, f else - match Sformat.unsafe_get fmt i with - | '%' -> scan_skip ir f (succ i) - | ' ' -> skip_whites ib; scan_fmt ir f (succ i) - | c -> check_char ib c; scan_fmt ir f (succ i) - - and scan_skip ir f i = - if i > lim then ir, f else - match Sformat.get fmt i with - | '_' -> scan_limits true ir f (succ i) - | _ -> scan_limits false ir f i - - and scan_limits skip ir f i = - - let rec scan_width i = - if i > lim then incomplete_format fmt else - match Sformat.get fmt i with - | '0' .. '9' as conv -> - let width, i = - read_int_literal (decimal_value_of_char conv) (succ i) in - Some width, i - | _ -> None, i - - and scan_precision i = - begin - match Sformat.get fmt i with - | '.' -> - let precision, i = read_int_literal 0 (succ i) in - (Some precision, i) - | _ -> None, i - end +(* Compute the width of a padding option (see "%42{" and "%123("). *) +let width_of_pad_opt pad_opt = match pad_opt with + | None -> max_int + | Some width -> width - and read_int_literal accu i = - if i > lim then accu, i else - match Sformat.unsafe_get fmt i with - | '0' .. '9' as c -> - let accu = 10 * accu + decimal_value_of_char c in - read_int_literal accu (succ i) - | _ -> accu, i in - - if i > lim then ir, f else - let width_opt, i = scan_width i in - let prec_opt, i = scan_precision i in - scan_conversion skip width_opt prec_opt ir f i - - and scan_conversion skip width_opt prec_opt ir f i = - let stack = if skip then no_stack else stack in - let width = int_of_width_opt width_opt in - let prec = int_of_prec_opt prec_opt in - match Sformat.get fmt i with - | '%' | '@' as c -> - check_char ib c; - scan_fmt ir f (succ i) - | '!' -> - if not (Scanning.end_of_input ib) - then bad_input "end of input not found" else - scan_fmt ir f (succ i) - | ',' -> - scan_fmt ir f (succ i) - | 's' -> - let i, stp = scan_indication (succ i) in - let _x = scan_string stp width ib in - scan_fmt ir (stack f (token_string ib)) (succ i) - | 'S' -> - let _x = scan_String width ib in - scan_fmt ir (stack f (token_string ib)) (succ i) - | '[' (* ']' *) -> - let i, char_set = scan_range fmt (succ i) in - let i, stp = scan_indication (succ i) in - let _x = scan_chars_in_char_set stp char_set width ib in - scan_fmt ir (stack f (token_string ib)) (succ i) - | ('c' | 'C') when width = 0 -> - let c = Scanning.checked_peek_char ib in - scan_fmt ir (stack f c) (succ i) - | 'c' -> - let _x = scan_char width ib in - scan_fmt ir (stack f (token_char ib)) (succ i) - | 'C' -> - let _x = scan_Char width ib in - scan_fmt ir (stack f (token_char ib)) (succ i) - | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' as conv -> - let _x = scan_int_conv conv width prec ib in - scan_fmt ir (stack f (token_int conv ib)) (succ i) - | 'N' as conv -> - scan_fmt ir (stack f (get_count conv ib)) (succ i) - | 'f' | 'e' | 'E' | 'g' | 'G' -> - let _x = scan_float width prec ib in - scan_fmt ir (stack f (token_float ib)) (succ i) - | 'F' -> - let _x = scan_Float width prec ib in - scan_fmt ir (stack f (token_float ib)) (succ i) -(* | 'B' | 'b' when width = Some 0 -> - let _x = scan_bool width ib in - scan_fmt ir (stack f (token_int ib)) (succ i) *) - | 'B' | 'b' -> - let _x = scan_bool width ib in - scan_fmt ir (stack f (token_bool ib)) (succ i) - | 'r' -> - if ir > limr then assert false else - let token = Obj.magic rv.(ir) ib in - scan_fmt (succ ir) (stack f token) (succ i) - | 'l' | 'n' | 'L' as conv0 -> - let i = succ i in - if i > lim then scan_fmt ir (stack f (get_count conv0 ib)) i else begin - match Sformat.get fmt i with - (* This is in fact an integer conversion (e.g. %ld, %ni, or %Lo). *) - | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' as conv1 -> - let _x = scan_int_conv conv1 width prec ib in - (* Look back to the character that triggered the integer conversion - (this character is either 'l', 'n' or 'L') to find the - conversion to apply to the integer token read. *) - begin match conv0 with - | 'l' -> scan_fmt ir (stack f (token_int32 conv1 ib)) (succ i) - | 'n' -> scan_fmt ir (stack f (token_nativeint conv1 ib)) (succ i) - | _ -> scan_fmt ir (stack f (token_int64 conv1 ib)) (succ i) end - (* This is not an integer conversion, but a regular %l, %n or %L. *) - | _ -> scan_fmt ir (stack f (get_count conv0 ib)) i end - | '(' | '{' as conv (* ')' '}' *) -> - let i = succ i in - (* Find [mf], the static specification for the format to read. *) - let j = - Tformat.sub_format - incomplete_format bad_conversion conv fmt i in - let mf = Sformat.sub fmt (Sformat.index_of_int i) (j - 2 - i) in - (* Read [rf], the specified format string in the input buffer, - and check its correctness w.r.t. [mf]. *) - let _x = scan_String width ib in - let rf = token_string ib in - if not (compatible_format_type rf mf) then format_mismatch rf mf else - (* Proceed according to the kind of metaformat found: - - %{ mf %} simply returns [rf] as the token read, - - %( mf %) returns [rf] as the first token read, then - returns a second token obtained by scanning the input with - format string [rf]. - Behaviour for %( mf %) is mandatory for sake of format string - typechecking specification. To get pure format string - substitution behaviour, you should use %_( mf %) that skips the - first (format string) token and hence properly substitutes [mf] by - [rf] in the format string argument. +let stopper_of_formatting_lit fmting = + if fmting = Escaped_percent then '%', "" else + let str = string_of_formatting_lit fmting in + let stp = str.[1] in + let sub_str = String.sub str 2 (String.length str - 2) in + stp, sub_str + +(******************************************************************************) + (* Readers managment *) + +(* A call to take_format_readers on a format is evaluated into functions + taking readers as arguments and aggregate them into an heterogeneous list *) +(* When all readers are taken, finally pass the list of the readers to the + continuation k. *) +let rec take_format_readers : type a c d e f . + ((d, e) heter_list -> e) -> (a, Scanning.in_channel, c, d, e, f) fmt -> + d = +fun k fmt -> match fmt with + | Reader fmt_rest -> + fun reader -> + let new_k readers_rest = k (Cons (reader, readers_rest)) in + take_format_readers new_k fmt_rest + | Char rest -> take_format_readers k rest + | Caml_char rest -> take_format_readers k rest + | String (_, rest) -> take_format_readers k rest + | Caml_string (_, rest) -> take_format_readers k rest + | Int (_, _, _, rest) -> take_format_readers k rest + | Int32 (_, _, _, rest) -> take_format_readers k rest + | Nativeint (_, _, _, rest) -> take_format_readers k rest + | Int64 (_, _, _, rest) -> take_format_readers k rest + | Float (_, _, _, rest) -> take_format_readers k rest + | Bool rest -> take_format_readers k rest + | Alpha rest -> take_format_readers k rest + | Theta rest -> take_format_readers k rest + | Flush rest -> take_format_readers k rest + | String_literal (_, rest) -> take_format_readers k rest + | Char_literal (_, rest) -> take_format_readers k rest + + | Scan_char_set (_, _, rest) -> take_format_readers k rest + | Scan_get_counter (_, rest) -> take_format_readers k rest + + | Formatting_lit (_, rest) -> take_format_readers k rest + | Formatting_gen (Open_tag (Format (fmt, _)), rest) -> take_format_readers k (concat_fmt fmt rest) + | Formatting_gen (Open_box (Format (fmt, _)), rest) -> take_format_readers k (concat_fmt fmt rest) + + | Format_arg (_, _, rest) -> take_format_readers k rest + | Format_subst (_, fmtty, rest) -> + take_fmtty_format_readers k (erase_rel (symm fmtty)) rest + | Ignored_param (ign, rest) -> take_ignored_format_readers k ign rest + + | End_of_format -> k Nil + +(* Take readers associated to an fmtty coming from a Format_subst "%(...%)". *) +and take_fmtty_format_readers : type x y a c d e f . + ((d, e) heter_list -> e) -> (a, Scanning.in_channel, c, d, x, y) fmtty -> + (y, Scanning.in_channel, c, x, e, f) fmt -> d = +fun k fmtty fmt -> match fmtty with + | Reader_ty fmt_rest -> + fun reader -> + let new_k readers_rest = k (Cons (reader, readers_rest)) in + take_fmtty_format_readers new_k fmt_rest fmt + | Ignored_reader_ty fmt_rest -> + fun reader -> + let new_k readers_rest = k (Cons (reader, readers_rest)) in + take_fmtty_format_readers new_k fmt_rest fmt + | Char_ty rest -> take_fmtty_format_readers k rest fmt + | String_ty rest -> take_fmtty_format_readers k rest fmt + | Int_ty rest -> take_fmtty_format_readers k rest fmt + | Int32_ty rest -> take_fmtty_format_readers k rest fmt + | Nativeint_ty rest -> take_fmtty_format_readers k rest fmt + | Int64_ty rest -> take_fmtty_format_readers k rest fmt + | Float_ty rest -> take_fmtty_format_readers k rest fmt + | Bool_ty rest -> take_fmtty_format_readers k rest fmt + | Alpha_ty rest -> take_fmtty_format_readers k rest fmt + | Theta_ty rest -> take_fmtty_format_readers k rest fmt + | Format_arg_ty (_, rest) -> take_fmtty_format_readers k rest fmt + | End_of_fmtty -> take_format_readers k fmt + | Format_subst_ty (ty1, ty2, rest) -> + let ty = trans (symm ty1) ty2 in + take_fmtty_format_readers k (concat_fmtty ty rest) fmt + +(* Take readers associated to an ignored parameter. *) +and take_ignored_format_readers : type x y a c d e f . + ((d, e) heter_list -> e) -> (a, Scanning.in_channel, c, d, x, y) ignored -> + (y, Scanning.in_channel, c, x, e, f) fmt -> d = +fun k ign fmt -> match ign with + | Ignored_reader -> + fun reader -> + let new_k readers_rest = k (Cons (reader, readers_rest)) in + take_format_readers new_k fmt + | Ignored_char -> take_format_readers k fmt + | Ignored_caml_char -> take_format_readers k fmt + | Ignored_string _ -> take_format_readers k fmt + | Ignored_caml_string _ -> take_format_readers k fmt + | Ignored_int (_, _) -> take_format_readers k fmt + | Ignored_int32 (_, _) -> take_format_readers k fmt + | Ignored_nativeint (_, _) -> take_format_readers k fmt + | Ignored_int64 (_, _) -> take_format_readers k fmt + | Ignored_float (_, _) -> take_format_readers k fmt + | Ignored_bool -> take_format_readers k fmt + | Ignored_format_arg _ -> take_format_readers k fmt + | Ignored_format_subst (_, fmtty) -> take_fmtty_format_readers k fmtty fmt + | Ignored_scan_char_set _ -> take_format_readers k fmt + | Ignored_scan_get_counter _ -> take_format_readers k fmt + +(******************************************************************************) + (* Generic scanning *) + +(* Make a generic scanning function. *) +(* Scan a stream according to a format and readers obtained by + take_format_readers, and aggegate scanned values into an + heterogeneous list. *) +(* Return the heterogeneous list of scanned values. *) +let rec make_scanf : type a c d e f . + Scanning.in_channel -> (a, Scanning.in_channel, c, d, e, f) fmt -> + (d, _) heter_list -> (a, f) heter_list = +fun ib fmt readers -> match fmt with + | Char rest -> + let _ = scan_char 0 ib in + let c = token_char ib in + Cons (c, make_scanf ib rest readers) + | Caml_char rest -> + let _ = scan_caml_char 0 ib in + let c = token_char ib in + Cons (c, make_scanf ib rest readers) + + | String (pad, Formatting_lit (fmting_lit, rest)) -> + let stp, str = stopper_of_formatting_lit fmting_lit in + let scan width _ ib = scan_string (Some stp) width ib in + let str_rest = String_literal (str, rest) in + pad_prec_scanf ib str_rest readers pad No_precision scan token_string + | String (pad, rest) -> + let scan width _ ib = scan_string None width ib in + pad_prec_scanf ib rest readers pad No_precision scan token_string + + | Caml_string (pad, rest) -> + let scan width _ ib = scan_caml_string width ib in + pad_prec_scanf ib rest readers pad No_precision scan token_string + | Int (iconv, pad, prec, rest) -> + let c = char_of_iconv iconv in + let scan width _ ib = scan_int_conv c width ib in + pad_prec_scanf ib rest readers pad prec scan (token_int c) + | Int32 (iconv, pad, prec, rest) -> + let c = char_of_iconv iconv in + let scan width _ ib = scan_int_conv c width ib in + pad_prec_scanf ib rest readers pad prec scan (token_int32 c) + | Nativeint (iconv, pad, prec, rest) -> + let c = char_of_iconv iconv in + let scan width _ ib = scan_int_conv c width ib in + pad_prec_scanf ib rest readers pad prec scan (token_nativeint c) + | Int64 (iconv, pad, prec, rest) -> + let c = char_of_iconv iconv in + let scan width _ ib = scan_int_conv c width ib in + pad_prec_scanf ib rest readers pad prec scan (token_int64 c) + | Float (Float_F, pad, prec, rest) -> + pad_prec_scanf ib rest readers pad prec scan_caml_float token_float + | Float ((Float_f | Float_pf | Float_sf | Float_e | Float_pe | Float_se + | Float_E | Float_pE | Float_sE | Float_g | Float_pg | Float_sg + | Float_G | Float_pG | Float_sG), pad, prec, rest) -> + pad_prec_scanf ib rest readers pad prec scan_float token_float + + | Bool rest -> + let _ = scan_bool ib in + let b = token_bool ib in + Cons (b, make_scanf ib rest readers) + | Alpha _ -> + invalid_arg "scanf: bad conversion \"%a\"" + | Theta _ -> + invalid_arg "scanf: bad conversion \"%t\"" + | Reader fmt_rest -> + let Cons (reader, readers_rest) = readers in + let x = reader ib in + Cons (x, make_scanf ib fmt_rest readers_rest) + | Flush rest -> + if Scanning.end_of_input ib then make_scanf ib rest readers + else bad_input "end of input not found" + + | String_literal (str, rest) -> + String.iter (check_char ib) str; + make_scanf ib rest readers + | Char_literal (chr, rest) -> + check_char ib chr; + make_scanf ib rest readers + + | Format_arg (pad_opt, fmtty, rest) -> + let _ = scan_caml_string (width_of_pad_opt pad_opt) ib in + let s = token_string ib in + let fmt = + try format_of_string_fmtty s fmtty + with Failure msg -> bad_input msg + in + Cons (fmt, make_scanf ib rest readers) + | Format_subst (pad_opt, fmtty, rest) -> + let _ = scan_caml_string (width_of_pad_opt pad_opt) ib in + let s = token_string ib in + let fmt, fmt' = + try + let Fmt_EBB fmt = fmt_ebb_of_string s in + let Fmt_EBB fmt' = fmt_ebb_of_string s in + (* TODO: find a way to avoid reparsing twice *) + + (* TODO: these type-checks below *can* fail because of type + ambiguity in presence of ignored-readers: "%_r%d" and "%d%_r" + are typed in the same way. + + # Scanf.sscanf "\"%_r%d\"3" "%(%d%_r%)" ignore + (fun fmt n -> string_of_format fmt, n);; + Exception: CamlinternalFormat.Type_mismatch. + + We should properly catch this exception. *) - (* For conversion %{%}, just return this format string as the token - read and go on with the rest of the format string argument. *) - if conv = '{' (* '}' *) then scan_fmt ir (stack f rf) j else - (* Or else, return this format string as the first token read; - then continue scanning using this format string to get - the following token read; - finally go on with the rest of the format string argument. *) - let ir, nf = scan (string_to_format rf) ir (stack f rf) 0 in - (* Return the format string read and the value just read, - then go on with the rest of the format. *) - scan_fmt ir nf j - - | c -> bad_conversion fmt i c - - and scan_indication j = - if j > lim then j - 1, [] else - match Sformat.get fmt j with - | '@' -> - let k = j + 1 in - if k > lim then j - 1, [] else - begin match Sformat.get fmt k with - | '%' -> - let k = k + 1 in - if k > lim then j - 1, [] else - begin match Sformat.get fmt k with - | '%' | '@' as c -> k, [ c ] - | _c -> j - 1, [] - end - | c -> k, [ c ] - end - | _c -> j - 1, [] in - - scan_fmt in - - - Scanning.reset_token ib; - - let v = - try snd (scan fmt 0 (fun () -> f) 0) with - | (Scan_failure _ | Failure _ | End_of_file) as exc -> - stack (delay ef ib) exc in - return v -;; - -let mkscanf ib ef fmt = - let sc = scan_format ib ef in - ascanf sc fmt -;; - -let kscanf ib ef fmt = mkscanf ib ef fmt;; - -let bscanf ib = kscanf ib scanf_bad_input;; - -let fscanf ic = bscanf (Scanning.from_channel ic);; - -let sscanf : string -> ('a, 'b, 'c, 'd) scanner - = fun s -> bscanf (Scanning.from_string s);; - -let scanf fmt = bscanf Scanning.stdib fmt;; - -let bscanf_format ib fmt f = - let fmt = Sformat.unsafe_to_string fmt in - let fmt1 = - ignore (scan_String max_int ib); - token_string ib in - if not (compatible_format_type fmt1 fmt) then - format_mismatch fmt1 fmt else - f (string_to_format fmt1) -;; + type_format fmt (erase_rel fmtty), + type_format fmt' (erase_rel (symm fmtty)) + with Failure msg -> bad_input msg + in + Cons (Format (fmt, s), + make_scanf ib (concat_fmt fmt' rest) readers) + + | Scan_char_set (width_opt, char_set, Formatting_lit (fmting_lit, rest)) -> + let stp, str = stopper_of_formatting_lit fmting_lit in + let width = width_of_pad_opt width_opt in + let _ = scan_chars_in_char_set char_set (Some stp) width ib in + let s = token_string ib in + let str_rest = String_literal (str, rest) in + Cons (s, make_scanf ib str_rest readers) + | Scan_char_set (width_opt, char_set, rest) -> + let width = width_of_pad_opt width_opt in + let _ = scan_chars_in_char_set char_set None width ib in + let s = token_string ib in + Cons (s, make_scanf ib rest readers) + | Scan_get_counter (counter, rest) -> + let count = get_counter ib counter in + Cons (count, make_scanf ib rest readers) + + | Formatting_lit (formatting_lit, rest) -> + String.iter (check_char ib) (string_of_formatting_lit formatting_lit); + make_scanf ib rest readers + | Formatting_gen (Open_tag (Format (fmt', _)), rest) -> + check_char ib '@'; check_char ib '{'; + make_scanf ib (concat_fmt fmt' rest) readers + | Formatting_gen (Open_box (Format (fmt', _)), rest) -> + check_char ib '@'; check_char ib '['; + make_scanf ib (concat_fmt fmt' rest) readers + + | Ignored_param (ign, rest) -> + let Param_format_EBB fmt' = param_format_of_ignored_format ign rest in + begin match make_scanf ib fmt' readers with + | Cons (_, arg_rest) -> arg_rest + | Nil -> assert false + end -let sscanf_format s fmt = bscanf_format (Scanning.from_string s) fmt;; + | End_of_format -> + Nil + +(* Case analysis on padding and precision. *) +(* Reject formats containing "%*" or "%.*". *) +(* Pass padding and precision to the generic scanner `scan'. *) +and pad_prec_scanf : type a c d e f x y z t . + Scanning.in_channel -> (a, Scanning.in_channel, c, d, e, f) fmt -> + (d, _) heter_list -> (x, y) padding -> (y, z -> a) precision -> + (int -> int -> Scanning.in_channel -> t) -> + (Scanning.in_channel -> z) -> + (x, f) heter_list = +fun ib fmt readers pad prec scan token -> match pad, prec with + | No_padding, No_precision -> + let _ = scan max_int max_int ib in + let x = token ib in + Cons (x, make_scanf ib fmt readers) + | No_padding, Lit_precision p -> + let _ = scan max_int p ib in + let x = token ib in + Cons (x, make_scanf ib fmt readers) + | Lit_padding ((Right | Zeros), w), No_precision -> + let _ = scan w max_int ib in + let x = token ib in + Cons (x, make_scanf ib fmt readers) + | Lit_padding ((Right | Zeros), w), Lit_precision p -> + let _ = scan w p ib in + let x = token ib in + Cons (x, make_scanf ib fmt readers) + | Lit_padding (Left, _), _ -> + invalid_arg "scanf: bad conversion \"%-\"" + | Lit_padding ((Right | Zeros), _), Arg_precision -> + invalid_arg "scanf: bad conversion \"%*\"" + | Arg_padding _, _ -> + invalid_arg "scanf: bad conversion \"%*\"" + | No_padding, Arg_precision -> + invalid_arg "scanf: bad conversion \"%*\"" + +(******************************************************************************) + (* Defining [scanf] and various flavors of [scanf] *) + +type 'a kscanf_result = Args of 'a | Exc of exn + +let kscanf ib ef (Format (fmt, str)) = + let rec apply : type a b . a -> (a, b) heter_list -> b = + fun f args -> match args with + | Cons (x, r) -> apply (f x) r + | Nil -> f + in + let k readers f = + Scanning.reset_token ib; + match try Args (make_scanf ib fmt readers) with + | (Scan_failure _ | Failure _ | End_of_file) as exc -> Exc exc + | Invalid_argument msg -> + invalid_arg (msg ^ " in format \"" ^ String.escaped str ^ "\"") + with + | Args args -> apply f args + | Exc exc -> ef ib exc + in + take_format_readers k fmt + +let kbscanf = kscanf + +(***) + +let ksscanf s ef fmt = kbscanf (Scanning.from_string s) ef fmt +let kfscanf ic ef fmt = kbscanf (Scanning.from_channel ic) ef fmt +let bscanf ib fmt = kscanf ib scanf_bad_input fmt +let fscanf ic fmt = kscanf (Scanning.from_channel ic) scanf_bad_input fmt +let sscanf s fmt = kscanf (Scanning.from_string s) scanf_bad_input fmt +let scanf fmt = kscanf Scanning.stdib scanf_bad_input fmt + +(***) + +let bscanf_format : Scanning.in_channel -> ('a, 'b, 'c, 'd, 'e, 'f) format6 -> + (('a, 'b, 'c, 'd, 'e, 'f) format6 -> 'g) -> 'g = + fun ib format f -> + let _ = scan_caml_string max_int ib in + let str = token_string ib in + let fmt' = + try format_of_string_format str format + with Failure msg -> bad_input msg + in + f fmt' + +let sscanf_format : string -> ('a, 'b, 'c, 'd, 'e, 'f) format6 -> + (('a, 'b, 'c, 'd, 'e, 'f) format6 -> 'g) -> 'g = + fun s format f -> bscanf_format (Scanning.from_string s) format f let string_to_String s = let l = String.length s in @@ -1566,9 +1352,3 @@ let format_from_string s fmt = let unescaped s = sscanf ("\"" ^ s ^ "\"") "%S%!" (fun x -> x) ;; - -(* - Local Variables: - compile-command: "cd ..; make world" - End: -*) diff --git a/stdlib/scanf.mli b/stdlib/scanf.mli index a1b3d1acb..297d6f2d5 100644 --- a/stdlib/scanf.mli +++ b/stdlib/scanf.mli @@ -485,6 +485,16 @@ val kscanf : exception that aborted the scanning process as arguments. *) +val ksscanf : + string -> (Scanning.in_channel -> exn -> 'd) -> + ('a, 'b, 'c, 'd) scanner +(** Same as {!Scanf.kscanf} but reads from the given string. *) + +val kfscanf : + Pervasives.in_channel -> (Scanning.in_channel -> exn -> 'd) -> + ('a, 'b, 'c, 'd) scanner +(** Same as {!Scanf.kscanf}, but reads from the given regular input channel. *) + (** {6 Reading format strings from input} *) val bscanf_format : diff --git a/stdlib/set.ml b/stdlib/set.ml index 1d049b139..5c6212d88 100644 --- a/stdlib/set.ml +++ b/stdlib/set.ml @@ -363,7 +363,8 @@ module Make(Ord: OrderedType) = | 0, l -> Empty, l | 1, x0 :: l -> Node (Empty, x0, Empty, 1), l | 2, x0 :: x1 :: l -> Node (Node(Empty, x0, Empty, 1), x1, Empty, 2), l - | 3, x0 :: x1 :: x2 :: l -> Node (Node(Empty, x0, Empty, 1), x1, Node(Empty, x2, Empty, 1), 2), l + | 3, x0 :: x1 :: x2 :: l -> + Node (Node(Empty, x0, Empty, 1), x1, Node(Empty, x2, Empty, 1), 2),l | n, l -> let nl = n / 2 in let left, l = sub nl l in diff --git a/stdlib/set.mli b/stdlib/set.mli index 556ee9388..1b67398ee 100644 --- a/stdlib/set.mli +++ b/stdlib/set.mli @@ -19,6 +19,27 @@ The implementation uses balanced binary trees, and is therefore reasonably efficient: insertion and membership take time logarithmic in the size of the set, for instance. + + The [Make] functor constructs implementations for any type, given a + [compare] function. + For instance: + {[ + module IntPairs = + struct + type t = int * int + let compare (x0,y0) (x1,y1) = + match Pervasives.compare x0 x1 with + 0 -> Pervasives.compare y0 y1 + | c -> c + end + + module PairsSet = Set.Make(IntPairs) + + let m = PairsSet.(empty |> add (2,3) |> add (5,7) |> add (11,13)) + ]} + + This creates a new module [PairsSet], with a new type [PairsSet.t] + of sets of [int * int]. *) module type OrderedType = diff --git a/stdlib/sort.mli b/stdlib/sort.mli index d5abb79fa..a9be27e13 100644 --- a/stdlib/sort.mli +++ b/stdlib/sort.mli @@ -20,11 +20,13 @@ *) val list : ('a -> 'a -> bool) -> 'a list -> 'a list + [@@ocaml.deprecated] (** Sort a list in increasing order according to an ordering predicate. The predicate should return [true] if its first argument is less than or equal to its second argument. *) val array : ('a -> 'a -> bool) -> 'a array -> unit + [@@ocaml.deprecated] (** Sort an array in increasing order according to an ordering predicate. The predicate should return [true] if its first argument is @@ -32,6 +34,7 @@ val array : ('a -> 'a -> bool) -> 'a array -> unit The array is sorted in place. *) val merge : ('a -> 'a -> bool) -> 'a list -> 'a list -> 'a list + [@@ocaml.deprecated] (** Merge two lists according to the given predicate. Assuming the two argument lists are sorted according to the predicate, [merge] returns a sorted list containing the elements diff --git a/stdlib/stdLabels.ml b/stdlib/stdLabels.ml index 35b25e0b7..60713ca0c 100644 --- a/stdlib/stdLabels.ml +++ b/stdlib/stdLabels.ml @@ -18,3 +18,5 @@ module Array = ArrayLabels module List = ListLabels module String = StringLabels + +module Bytes = BytesLabels diff --git a/stdlib/stdLabels.mli b/stdlib/stdLabels.mli index 144936f17..c607a9987 100644 --- a/stdlib/stdLabels.mli +++ b/stdlib/stdLabels.mli @@ -14,12 +14,14 @@ (** Standard labeled libraries. This meta-module provides labelized version of the {!Array}, - {!List} and {!String} modules. + {!Bytes}, {!List} and {!String} modules. They only differ by their labels. Detailed interfaces can be found - in [arrayLabels.mli], [listLabels.mli] and [stringLabels.mli]. + in [arrayLabels.mli], [bytesLabels.mli], [listLabels.mli] + and [stringLabels.mli]. *) module Array = ArrayLabels +module Bytes = BytesLabels module List = ListLabels module String = StringLabels diff --git a/stdlib/stream.ml b/stdlib/stream.ml index 753bce005..751c741a8 100644 --- a/stdlib/stream.ml +++ b/stdlib/stream.ml @@ -25,7 +25,7 @@ and 'a data = | Sbuffio of buffio and 'a gen = { mutable curr : 'a option option; func : int -> 'a option } and buffio = - { ic : in_channel; buff : string; mutable len : int; mutable ind : int } + { ic : in_channel; buff : bytes; mutable len : int; mutable ind : int } ;; exception Failure;; exception Error of string;; @@ -37,7 +37,7 @@ let set_data (s : 'a t) (d : 'a data) = ;; let fill_buff b = - b.len <- input b.ic b.buff 0 (String.length b.buff); b.ind <- 0 + b.len <- input b.ic b.buff 0 (Bytes.length b.buff); b.ind <- 0 ;; let rec get_data count d = match d with @@ -64,7 +64,7 @@ let rec get_data count d = match d with | Sbuffio b -> if b.ind >= b.len then fill_buff b; if b.len == 0 then Sempty else - let r = Obj.magic (String.unsafe_get b.buff b.ind) in + let r = Obj.magic (Bytes.unsafe_get b.buff b.ind) in (* Warning: anyone using g thinks that an item has been read *) b.ind <- succ b.ind; Scons(r, d) | Slazy f -> get_data count (Lazy.force f) @@ -87,7 +87,7 @@ let rec peek s = | Sbuffio b -> if b.ind >= b.len then fill_buff b; if b.len == 0 then begin set_data s Sempty; None end - else Some (Obj.magic (String.unsafe_get b.buff b.ind)) + else Some (Obj.magic (Bytes.unsafe_get b.buff b.ind)) ;; let rec junk s = @@ -159,9 +159,18 @@ let of_string s = else None) ;; +let of_bytes s = + let count = ref 0 in + from (fun _ -> + let c = !count in + if c < Bytes.length s + then (incr count; Some (Bytes.get s c)) + else None) +;; + let of_channel ic = {count = 0; - data = Sbuffio {ic = ic; buff = String.create 4096; len = 0; ind = 0}} + data = Sbuffio {ic = ic; buff = Bytes.create 4096; len = 0; ind = 0}} ;; (* Stream expressions builders *) diff --git a/stdlib/stream.mli b/stdlib/stream.mli index aeb0da1e8..85a846102 100644 --- a/stdlib/stream.mli +++ b/stdlib/stream.mli @@ -46,6 +46,9 @@ val of_list : 'a list -> 'a t val of_string : string -> char t (** Return the stream of the characters of the string parameter. *) +val of_bytes : bytes -> char t +(** Return the stream of the characters of the bytes parameter. *) + val of_channel : in_channel -> char t (** Return the stream of the characters read from the input channel. *) diff --git a/stdlib/string.ml b/stdlib/string.ml index fda40b527..93880af26 100644 --- a/stdlib/string.ml +++ b/stdlib/string.ml @@ -2,72 +2,53 @@ (* *) (* OCaml *) (* *) -(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* Damien Doligez, projet Gallium, INRIA Rocquencourt *) (* *) -(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) (* en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with *) (* the special exception on linking described in file ../LICENSE. *) (* *) (***********************************************************************) -(* String operations *) +(* String operations, based on byte sequence operations *) external length : string -> int = "%string_length" external get : string -> int -> char = "%string_safe_get" -external set : string -> int -> char -> unit = "%string_safe_set" -external create : int -> string = "caml_create_string" +external set : bytes -> int -> char -> unit = "%string_safe_set" +external create : int -> bytes = "caml_create_string" external unsafe_get : string -> int -> char = "%string_unsafe_get" -external unsafe_set : string -> int -> char -> unit = "%string_unsafe_set" -external unsafe_blit : string -> int -> string -> int -> int -> unit +external unsafe_set : bytes -> int -> char -> unit = "%string_unsafe_set" +external unsafe_blit : string -> int -> bytes -> int -> int -> unit = "caml_blit_string" "noalloc" -external unsafe_fill : string -> int -> int -> char -> unit +external unsafe_fill : bytes -> int -> int -> char -> unit = "caml_fill_string" "noalloc" -let make n c = - let s = create n in - unsafe_fill s 0 n c; - s +module B = Bytes -let copy s = - let len = length s in - let r = create len in - unsafe_blit s 0 r 0 len; - r +let bts = B.unsafe_to_string +let bos = B.unsafe_of_string +let make n c = + B.make n c |> bts +let init n f = + B.init n f |> bts +let copy s = + B.copy (bos s) |> bts let sub s ofs len = - if ofs < 0 || len < 0 || ofs > length s - len - then invalid_arg "String.sub" - else begin - let r = create len in - unsafe_blit s ofs r 0 len; - r - end - -let fill s ofs len c = - if ofs < 0 || len < 0 || ofs > length s - len - then invalid_arg "String.fill" - else unsafe_fill s ofs len c - -let blit s1 ofs1 s2 ofs2 len = - if len < 0 || ofs1 < 0 || ofs1 > length s1 - len - || ofs2 < 0 || ofs2 > length s2 - len - then invalid_arg "String.blit" - else unsafe_blit s1 ofs1 s2 ofs2 len - -let iter f a = - for i = 0 to length a - 1 do f(unsafe_get a i) done - -let iteri f a = - for i = 0 to length a - 1 do f i (unsafe_get a i) done + B.sub (bos s) ofs len |> bts +let fill = + B.fill +let blit = + B.blit_string let concat sep l = match l with - [] -> "" + | [] -> "" | hd :: tl -> let num = ref 0 and len = ref 0 in List.iter (fun s -> incr num; len := !len + length s) l; - let r = create (!len + length sep * (!num - 1)) in + let r = B.create (!len + length sep * (!num - 1)) in unsafe_blit hd 0 r 0 (length hd); let pos = ref(length hd) in List.iter @@ -77,128 +58,68 @@ let concat sep l = unsafe_blit s 0 r !pos (length s); pos := !pos + length s) tl; - r + Bytes.unsafe_to_string r + +let iter f s = + B.iter f (bos s) +let iteri f s = + B.iteri f (bos s) +let map f s = + B.map f (bos s) |> bts +let mapi f s = + B.mapi f (bos s) |> bts + +(* Beware: we cannot use B.trim or B.escape because they always make a + copy, but String.mli spells out some cases where we are not allowed + to make a copy. *) external is_printable: char -> bool = "caml_is_printable" -external char_code: char -> int = "%identity" -external char_chr: int -> char = "%identity" let is_space = function | ' ' | '\012' | '\n' | '\r' | '\t' -> true | _ -> false let trim s = - let len = length s in - let i = ref 0 in - while !i < len && is_space (unsafe_get s !i) do - incr i - done; - let j = ref (len - 1) in - while !j >= !i && is_space (unsafe_get s !j) do - decr j - done; - if !i = 0 && !j = len - 1 then - s - else if !j >= !i then - sub s !i (!j - !i + 1) - else - "" + if s = "" then s + else if is_space (unsafe_get s 0) || is_space (unsafe_get s (length s - 1)) + then bts (B.trim (bos s)) + else s let escaped s = - let n = ref 0 in - for i = 0 to length s - 1 do - n := !n + - (match unsafe_get s i with - | '"' | '\\' | '\n' | '\t' | '\r' | '\b' -> 2 - | c -> if is_printable c then 1 else 4) - done; - if !n = length s then s else begin - let s' = create !n in - n := 0; - for i = 0 to length s - 1 do - begin - match unsafe_get s i with - | ('"' | '\\') as c -> - unsafe_set s' !n '\\'; incr n; unsafe_set s' !n c - | '\n' -> - unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'n' - | '\t' -> - unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 't' - | '\r' -> - unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'r' - | '\b' -> - unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'b' - | c -> - if is_printable c then - unsafe_set s' !n c - else begin - let a = char_code c in - unsafe_set s' !n '\\'; - incr n; - unsafe_set s' !n (char_chr (48 + a / 100)); - incr n; - unsafe_set s' !n (char_chr (48 + (a / 10) mod 10)); - incr n; - unsafe_set s' !n (char_chr (48 + a mod 10)) - end - end; - incr n - done; - s' - end - -let map f s = - let l = length s in - if l = 0 then s else begin - let r = create l in - for i = 0 to l - 1 do unsafe_set r i (f(unsafe_get s i)) done; - r - end - -let uppercase s = map Char.uppercase s -let lowercase s = map Char.lowercase s - -let apply1 f s = - if length s = 0 then s else begin - let r = copy s in - unsafe_set r 0 (f(unsafe_get s 0)); - r - end - -let capitalize s = apply1 Char.uppercase s -let uncapitalize s = apply1 Char.lowercase s - -let rec index_rec s lim i c = - if i >= lim then raise Not_found else - if unsafe_get s i = c then i else index_rec s lim (i + 1) c;; - -let index s c = index_rec s (length s) 0 c;; - -let index_from s i c = - let l = length s in - if i < 0 || i > l then invalid_arg "String.index_from" else - index_rec s l i c;; - -let rec rindex_rec s i c = - if i < 0 then raise Not_found else - if unsafe_get s i = c then i else rindex_rec s (i - 1) c;; - -let rindex s c = rindex_rec s (length s - 1) c;; + let rec needs_escape i = + if i >= length s then false else + match unsafe_get s i with + | '"' | '\\' | '\n' | '\t' | '\r' | '\b' -> true + | c when is_printable c -> needs_escape (i+1) + | _ -> true + in + if needs_escape 0 then + bts (B.escaped (bos s)) + else + s +let index s c = + B.index (bos s) c +let rindex s c = + B.rindex (bos s) c +let index_from s i c= + B.index_from (bos s) i c let rindex_from s i c = - if i < -1 || i >= length s then invalid_arg "String.rindex_from" else - rindex_rec s i c;; - + B.rindex_from (bos s) i c +let contains s c = + B.contains (bos s) c let contains_from s i c = - let l = length s in - if i < 0 || i > l then invalid_arg "String.contains_from" else - try ignore (index_rec s l i c); true with Not_found -> false;; - -let contains s c = contains_from s 0 c;; - + B.contains_from (bos s) i c let rcontains_from s i c = - if i < 0 || i >= length s then invalid_arg "String.rcontains_from" else - try ignore (rindex_rec s i c); true with Not_found -> false;; + B.rcontains_from (bos s) i c +let uppercase s = + B.uppercase (bos s) |> bts +let lowercase s = + B.lowercase (bos s) |> bts +let capitalize s = + B.capitalize (bos s) |> bts +let uncapitalize s = + B.uncapitalize (bos s) |> bts type t = string diff --git a/stdlib/string.mli b/stdlib/string.mli index 14f2c82db..8f1e178b5 100644 --- a/stdlib/string.mli +++ b/stdlib/string.mli @@ -13,41 +13,36 @@ (** String operations. - Given a string [s] of length [l], we call character number in [s] - the index of a character in [s]. Indexes start at [0], and we will - call a character number valid in [s] if it falls within the range - [[0...l-1]]. A position is the point between two characters or at - the beginning or end of the string. We call a position valid - in [s] if it falls within the range [[0...l]]. Note that character - number [n] is between positions [n] and [n+1]. + A string is an immutable data structure that contains a + fixed-length sequence of (single-byte) characters. Each character + can be accessed in constant time through its index. + + Given a string [s] of length [l], we can access each of the [l] + characters of [s] via its index in the sequence. Indexes start at + [0], and we will call an index valid in [s] if it falls within the + range [[0...l-1]] (inclusive). A position is the point between two + characters or at the beginning or end of the string. We call a + position valid in [s] if it falls within the range [[0...l]] + (inclusive). Note that the character at index [n] is between + positions [n] and [n+1]. Two parameters [start] and [len] are said to designate a valid substring of [s] if [len >= 0] and [start] and [start+len] are valid positions in [s]. - OCaml strings can be modified in place, for instance via the - {!String.set} and {!String.blit} functions described below. This - possibility should be used rarely and with much care, however, since - both the OCaml compiler and most OCaml libraries share strings as if - they were immutable, rather than copying them. In particular, - string literals are shared: a single copy of the string is created - at program loading time and returned by all evaluations of the - string literal. Consider for example: - - {[ - # let f () = "foo";; - val f : unit -> string = <fun> - # (f ()).[0] <- 'b';; - - : unit = () - # f ();; - - : string = "boo" - ]} - - Likewise, many functions from the standard library can return string - literals or one of their string arguments. Therefore, the returned strings - must not be modified directly. If mutation is absolutely necessary, - it should be performed on a fresh copy of the string, as produced by - {!String.copy}. + OCaml strings used to be modifiable in place, for instance via the + {!String.set} and {!String.blit} functions described below. This + usage is deprecated and only possible when the compiler is put in + "unsafe-string" mode by giving the [-unsafe-string] command-line + option (which is currently the default for reasons of backward + compatibility). This is done by making the types [string] and + [bytes] (see module {!Bytes}) interchangeable so that functions + expecting byte sequences can also accept strings as arguments and + modify them. + + All new code should avoid this feature and be compiled with the + [-safe-string] command-line option to enforce the separation between + the types [string] and [bytes]. *) @@ -55,33 +50,51 @@ external length : string -> int = "%string_length" (** Return the length (number of characters) of the given string. *) external get : string -> int -> char = "%string_safe_get" -(** [String.get s n] returns character number [n] in string [s]. +(** [String.get s n] returns the character at index [n] in string [s]. You can also write [s.[n]] instead of [String.get s n]. - Raise [Invalid_argument] if [n] not a valid character number in [s]. *) + Raise [Invalid_argument] if [n] not a valid index in [s]. *) -external set : string -> int -> char -> unit = "%string_safe_set" -(** [String.set s n c] modifies string [s] in place, - replacing the character number [n] by [c]. +external set : bytes -> int -> char -> unit = "%string_safe_set" + [@@ocaml.deprecated] +(** [String.set s n c] modifies byte sequence [s] in place, + replacing the byte at index [n] with [c]. You can also write [s.[n] <- c] instead of [String.set s n c]. - Raise [Invalid_argument] if [n] is not a valid character number in [s]. *) + Raise [Invalid_argument] if [n] is not a valid index in [s]. -external create : int -> string = "caml_create_string" -(** [String.create n] returns a fresh string of length [n]. - The string initially contains arbitrary characters. + @deprecated This is a deprecated alias of {!Bytes.set}.[ ] *) - Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *) +external create : int -> bytes = "caml_create_string" [@@ocaml.deprecated] +(** [String.create n] returns a fresh byte sequence of length [n]. + The sequence is uninitialized and contains arbitrary bytes. + + Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. + + @deprecated This is a deprecated alias of {!Bytes.create}.[ ] *) val make : int -> char -> string (** [String.make n c] returns a fresh string of length [n], filled with the character [c]. - Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}.*) + Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *) + +val init : int -> (int -> char) -> string +(** [String.init n f] returns a string of length [n], with character + [i] initialized to the result of [f i] (called in increasing + index order). + + Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. + + @since 4.02.0 +*) -val copy : string -> string -(** Return a copy of the given string. *) +val copy : string -> string [@@ocaml.deprecated] +(** Return a copy of the given string. + + @deprecated Because strings are immutable, it doesn't make much + sense to make identical copies of them. *) val sub : string -> int -> int -> string (** [String.sub s start len] returns a fresh string of length [len], @@ -91,27 +104,24 @@ val sub : string -> int -> int -> string Raise [Invalid_argument] if [start] and [len] do not designate a valid substring of [s]. *) -val fill : string -> int -> int -> char -> unit -(** [String.fill s start len c] modifies string [s] in place, - replacing [len] characters by [c], starting at [start]. +val fill : bytes -> int -> int -> char -> unit [@@ocaml.deprecated] +(** [String.fill s start len c] modifies byte sequence [s] in place, + replacing [len] bytes with [c], starting at [start]. Raise [Invalid_argument] if [start] and [len] do not - designate a valid substring of [s]. *) + designate a valid range of [s]. -val blit : string -> int -> string -> int -> int -> unit -(** [String.blit src srcoff dst dstoff len] copies [len] characters - from string [src], starting at character number [srcoff], to - string [dst], starting at character number [dstoff]. It works - correctly even if [src] and [dst] are the same string, - and the source and destination intervals overlap. + @deprecated This is a deprecated alias of {!Bytes.fill}.[ ] *) - Raise [Invalid_argument] if [srcoff] and [len] do not - designate a valid substring of [src], or if [dstoff] and [len] - do not designate a valid substring of [dst]. *) +val blit : string -> int -> bytes -> int -> int -> unit +(** Same as {!Bytes.blit_string}. *) val concat : string -> string list -> string (** [String.concat sep sl] concatenates the list of strings [sl], - inserting the separator string [sep] between each. *) + inserting the separator string [sep] between each. + + Raise [Invalid_argument] if the result is longer than + {!Sys.max_string_length} bytes. *) val iter : (char -> unit) -> string -> unit (** [String.iter f s] applies function [f] in turn to all @@ -122,19 +132,24 @@ val iteri : (int -> char -> unit) -> string -> unit (** Same as {!String.iter}, but the function is applied to the index of the element as first argument (counting from 0), and the character itself as second argument. - @since 4.00.0 -*) + @since 4.00.0 *) val map : (char -> char) -> string -> string -(** [String.map f s] applies function [f] in turn to all - the characters of [s] and stores the results in a new string that - is returned. - @since 4.00.0 *) +(** [String.map f s] applies function [f] in turn to all the + characters of [s] (in increasing index order) and stores the + results in a new string that is returned. + @since 4.00.0 *) + +val mapi : (int -> char -> char) -> string -> string +(** [String.mapi f s] calls [f] with each character of [s] and its + index (in increasing index order) and stores the results in a new + string that is returned. + @since 4.02.0 *) val trim : string -> string (** Return a copy of the argument, without leading and trailing whitespace. The characters regarded as whitespace are: [' '], - ['\012'], ['\n'], ['\r'], and ['\t']. If there is no leading nor + ['\012'], ['\n'], ['\r'], and ['\t']. If there is neither leading nor trailing whitespace character in the argument, return the original string itself, not a copy. @since 4.00.0 *) @@ -144,22 +159,25 @@ val escaped : string -> string represented by escape sequences, following the lexical conventions of OCaml. If there is no special character in the argument, return the original string itself, - not a copy. Its inverse function is Scanf.unescaped. *) + not a copy. Its inverse function is Scanf.unescaped. + + Raise [Invalid_argument] if the result is longer than + {!Sys.max_string_length} bytes. *) val index : string -> char -> int -(** [String.index s c] returns the character number of the first +(** [String.index s c] returns the index of the first occurrence of character [c] in string [s]. Raise [Not_found] if [c] does not occur in [s]. *) val rindex : string -> char -> int -(** [String.rindex s c] returns the character number of the last +(** [String.rindex s c] returns the index of the last occurrence of character [c] in string [s]. Raise [Not_found] if [c] does not occur in [s]. *) val index_from : string -> int -> char -> int -(** [String.index_from s i c] returns the character number of the +(** [String.index_from s i c] returns the index of the first occurrence of character [c] in string [s] after position [i]. [String.index s c] is equivalent to [String.index_from s 0 c]. @@ -167,7 +185,7 @@ val index_from : string -> int -> char -> int Raise [Not_found] if [c] does not occur in [s] after position [i]. *) val rindex_from : string -> int -> char -> int -(** [String.rindex_from s i c] returns the character number of the +(** [String.rindex_from s i c] returns the index of the last occurrence of character [c] in string [s] before position [i+1]. [String.rindex s c] is equivalent to [String.rindex_from s (String.length s - 1) c]. @@ -224,8 +242,11 @@ val compare: t -> t -> int (* The following is for system use only. Do not call directly. *) external unsafe_get : string -> int -> char = "%string_unsafe_get" -external unsafe_set : string -> int -> char -> unit = "%string_unsafe_set" +external unsafe_set : bytes -> int -> char -> unit = "%string_unsafe_set" + [@@ocaml.deprecated] external unsafe_blit : - string -> int -> string -> int -> int -> unit = "caml_blit_string" "noalloc" + string -> int -> bytes -> int -> int -> unit + = "caml_blit_string" "noalloc" external unsafe_fill : - string -> int -> int -> char -> unit = "caml_fill_string" "noalloc" + bytes -> int -> int -> char -> unit = "caml_fill_string" "noalloc" + [@@ocaml.deprecated] diff --git a/stdlib/stringLabels.mli b/stdlib/stringLabels.mli index 8e2e6d379..1cf5d51ed 100644 --- a/stdlib/stringLabels.mli +++ b/stdlib/stringLabels.mli @@ -17,61 +17,71 @@ external length : string -> int = "%string_length" (** Return the length (number of characters) of the given string. *) external get : string -> int -> char = "%string_safe_get" -(** [String.get s n] returns character number [n] in string [s]. - The first character is character number 0. - The last character is character number [String.length s - 1]. +(** [String.get s n] returns the character at index [n] in string [s]. You can also write [s.[n]] instead of [String.get s n]. - Raise [Invalid_argument "index out of bounds"] - if [n] is outside the range 0 to [(String.length s - 1)]. *) + Raise [Invalid_argument] if [n] not a valid index in [s]. *) - -external set : string -> int -> char -> unit = "%string_safe_set" -(** [String.set s n c] modifies string [s] in place, - replacing the character number [n] by [c]. +external set : bytes -> int -> char -> unit = "%string_safe_set" + [@@ocaml.deprecated] +(** [String.set s n c] modifies byte sequence [s] in place, + replacing the byte at index [n] with [c]. You can also write [s.[n] <- c] instead of [String.set s n c]. - Raise [Invalid_argument "index out of bounds"] - if [n] is outside the range 0 to [(String.length s - 1)]. *) -external create : int -> string = "caml_create_string" -(** [String.create n] returns a fresh string of length [n]. - The string initially contains arbitrary characters. - Raise [Invalid_argument] if [n < 0] or [n > Sys.max_string_length]. -*) + Raise [Invalid_argument] if [n] is not a valid index in [s]. + + @deprecated This is a deprecated alias of {!Bytes.set}. *) + +external create : int -> bytes = "caml_create_string" [@@ocaml.deprecated] +(** [String.create n] returns a fresh byte sequence of length [n]. + The sequence is uninitialized and contains arbitrary bytes. + + Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. + + @deprecated This is a deprecated alias of {!Bytes.create}. *) val make : int -> char -> string (** [String.make n c] returns a fresh string of length [n], filled with the character [c]. - Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}.*) + + Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *) + +val init : int -> f:(int -> char) -> string +(** [init n f] returns a string of length [n], + with character [i] initialized to the result of [f i]. + + Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *) val copy : string -> string (** Return a copy of the given string. *) val sub : string -> pos:int -> len:int -> string (** [String.sub s start len] returns a fresh string of length [len], - containing the characters number [start] to [start + len - 1] - of string [s]. - Raise [Invalid_argument] if [start] and [len] do not - designate a valid substring of [s]; that is, if [start < 0], - or [len < 0], or [start + len > ]{!StringLabels.length}[ s]. *) + containing the substring of [s] that starts at position [start] and + has length [len]. -val fill : string -> pos:int -> len:int -> char -> unit -(** [String.fill s start len c] modifies string [s] in place, - replacing the characters number [start] to [start + len - 1] - by [c]. Raise [Invalid_argument] if [start] and [len] do not designate a valid substring of [s]. *) +val fill : bytes -> pos:int -> len:int -> char -> unit [@@ocaml.deprecated] +(** [String.fill s start len c] modifies byte sequence [s] in place, + replacing [len] bytes by [c], starting at [start]. + + Raise [Invalid_argument] if [start] and [len] do not + designate a valid substring of [s]. + + @deprecated This is a deprecated alias of {!Bytes.fill}. *) + val blit : - src:string -> src_pos:int -> dst:string -> dst_pos:int -> len:int -> unit -(** [String.blit src srcoff dst dstoff len] copies [len] characters - from string [src], starting at character number [srcoff], to - string [dst], starting at character number [dstoff]. It works - correctly even if [src] and [dst] are the same string, - and the source and destination chunks overlap. + src:string -> src_pos:int -> dst:bytes -> dst_pos:int -> len:int + -> unit +(** [String.blit src srcoff dst dstoff len] copies [len] bytes + from the string [src], starting at index [srcoff], + to byte sequence [dst], starting at character number [dstoff]. + Raise [Invalid_argument] if [srcoff] and [len] do not - designate a valid substring of [src], or if [dstoff] and [len] - do not designate a valid substring of [dst]. *) + designate a valid range of [src], or if [dstoff] and [len] + do not designate a valid range of [dst]. *) val concat : sep:string -> string list -> string (** [String.concat sep sl] concatenates the list of strings [sl], @@ -86,8 +96,7 @@ val iteri : f:(int -> char -> unit) -> string -> unit (** Same as {!String.iter}, but the function is applied to the index of the element as first argument (counting from 0), and the character itself as second argument. - @since 4.00.0 -*) + @since 4.00.0 *) val map : f:(char -> char) -> string -> string (** [String.map f s] applies function [f] in turn to all @@ -95,11 +104,18 @@ val map : f:(char -> char) -> string -> string is returned. @since 4.00.0 *) +val mapi : f:(int -> char -> char) -> string -> string +(** [String.mapi f s] calls [f] with each character of [s] and its + index (in increasing index order) and stores the results in a new + string that is returned. + @since 4.02.0 *) + val trim : string -> string -(** Return a copy of the argument, without leading and trailing whitespace. - The characters regarded as whitespace are: [' '], ['\012'], ['\n'], - ['\r'], and ['\t']. If there is no whitespace character in the argument, - return the original string itself, not a copy. +(** Return a copy of the argument, without leading and trailing + whitespace. The characters regarded as whitespace are: [' '], + ['\012'], ['\n'], ['\r'], and ['\t']. If there is no leading nor + trailing whitespace character in the argument, return the original + string itself, not a copy. @since 4.00.0 *) val escaped : string -> string @@ -107,28 +123,36 @@ val escaped : string -> string represented by escape sequences, following the lexical conventions of OCaml. If there is no special character in the argument, return the original string itself, - not a copy. *) + not a copy. Its inverse function is Scanf.unescaped. *) val index : string -> char -> int -(** [String.index s c] returns the position of the leftmost +(** [String.index s c] returns the index of the first occurrence of character [c] in string [s]. + Raise [Not_found] if [c] does not occur in [s]. *) val rindex : string -> char -> int -(** [String.rindex s c] returns the position of the rightmost +(** [String.rindex s c] returns the index of the last occurrence of character [c] in string [s]. + Raise [Not_found] if [c] does not occur in [s]. *) val index_from : string -> int -> char -> int -(** Same as {!StringLabels.index}, but start - searching at the character position given as second argument. - [String.index s c] is equivalent to [String.index_from s 0 c].*) +(** [String.index_from s i c] returns the index of the + first occurrence of character [c] in string [s] after position [i]. + [String.index s c] is equivalent to [String.index_from s 0 c]. + + Raise [Invalid_argument] if [i] is not a valid position in [s]. + Raise [Not_found] if [c] does not occur in [s] after position [i]. *) val rindex_from : string -> int -> char -> int -(** Same as {!StringLabels.rindex}, but start - searching at the character position given as second argument. +(** [String.rindex_from s i c] returns the index of the + last occurrence of character [c] in string [s] before position [i+1]. [String.rindex s c] is equivalent to - [String.rindex_from s (String.length s - 1) c]. *) + [String.rindex_from s (String.length s - 1) c]. + + Raise [Invalid_argument] if [i+1] is not a valid position in [s]. + Raise [Not_found] if [c] does not occur in [s] before position [i+1]. *) val contains : string -> char -> bool (** [String.contains s c] tests if character [c] @@ -136,15 +160,18 @@ val contains : string -> char -> bool val contains_from : string -> int -> char -> bool (** [String.contains_from s start c] tests if character [c] - appears in the substring of [s] starting from [start] to the end - of [s]. - Raise [Invalid_argument] if [start] is not a valid index of [s]. *) + appears in [s] after position [start]. + [String.contains s c] is equivalent to + [String.contains_from s 0 c]. + + Raise [Invalid_argument] if [start] is not a valid position in [s]. *) val rcontains_from : string -> int -> char -> bool (** [String.rcontains_from s stop c] tests if character [c] - appears in the substring of [s] starting from the beginning - of [s] to index [stop]. - Raise [Invalid_argument] if [stop] is not a valid index of [s]. *) + appears in [s] before position [stop+1]. + + Raise [Invalid_argument] if [stop < 0] or [stop+1] is not a valid + position in [s]. *) val uppercase : string -> string (** Return a copy of the argument, with all lowercase letters @@ -176,9 +203,11 @@ val compare: t -> t -> int (* The following is for system use only. Do not call directly. *) external unsafe_get : string -> int -> char = "%string_unsafe_get" -external unsafe_set : string -> int -> char -> unit = "%string_unsafe_set" +external unsafe_set : bytes -> int -> char -> unit = "%string_unsafe_set" + [@@ocaml.deprecated] external unsafe_blit : - src:string -> src_pos:int -> dst:string -> dst_pos:int -> len:int -> + src:string -> src_pos:int -> dst:bytes -> dst_pos:int -> len:int -> unit = "caml_blit_string" "noalloc" external unsafe_fill : - string -> pos:int -> len:int -> char -> unit = "caml_fill_string" "noalloc" + bytes -> pos:int -> len:int -> char -> unit = "caml_fill_string" "noalloc" + [@@ocaml.deprecated] diff --git a/stdlib/sys.mli b/stdlib/sys.mli index 944b1090f..ae175c2e8 100644 --- a/stdlib/sys.mli +++ b/stdlib/sys.mli @@ -11,7 +11,12 @@ (* *) (***********************************************************************) -(** System interface. *) +(** System interface. + + Every function in this module raises [Sys_error] with an + informative message when the underlying system call signal + an error. +*) val argv : string array (** The command line arguments given to the process. @@ -99,7 +104,7 @@ val big_endian : bool @since 4.00.0 *) val max_string_length : int -(** Maximum length of a string. *) +(** Maximum length of strings and byte sequences. *) val max_array_length : int (** Maximum length of a normal array. The maximum length of a float diff --git a/stdlib/weak.ml b/stdlib/weak.ml index 536a42e04..8166142b6 100644 --- a/stdlib/weak.ml +++ b/stdlib/weak.ml @@ -80,8 +80,8 @@ module Make (H : Hashtbl.HashedType) : (S with type data = H.t) = struct let sz = if sz < 7 then 7 else sz in let sz = if sz > Sys.max_array_length then Sys.max_array_length else sz in { - table = Array.create sz emptybucket; - hashes = Array.create sz [| |]; + table = Array.make sz emptybucket; + hashes = Array.make sz [| |]; limit = limit; oversize = 0; rover = 0; |