diff options
author | Gabriel Scherer <gabriel.scherer@gmail.com> | 2014-05-12 15:37:37 +0000 |
---|---|---|
committer | Gabriel Scherer <gabriel.scherer@gmail.com> | 2014-05-12 15:37:37 +0000 |
commit | 72669307e837a103476f44eb6680caf424274f92 (patch) | |
tree | e04b94d3726361e39d5f86698178b14089e9d960 /stdlib | |
parent | 9fa17c95a5575341a9dea716f5393f7e5b6e6e51 (diff) |
second part of Benoît Vaugon's format+gadts patch
To finish the bootstrap cycle, run:
make library-cross
make promote
make partialclean
make core
make library-cross
make promote-cross
make partialclean
make ocamlc ocamllex ocamltools
make library-cross
make promote
make partialclean
make core
make compare
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@14810 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
Diffstat (limited to 'stdlib')
-rwxr-xr-x | stdlib/Compflags | 4 | ||||
-rw-r--r-- | stdlib/camlinternalFormat.ml | 15 | ||||
-rw-r--r-- | stdlib/format.ml | 455 | ||||
-rw-r--r-- | stdlib/pervasives.ml | 27 | ||||
-rw-r--r-- | stdlib/pervasives.mli | 4 | ||||
-rw-r--r-- | stdlib/printf.ml | 750 | ||||
-rw-r--r-- | stdlib/printf.mli | 73 | ||||
-rw-r--r-- | stdlib/scanf.ml | 1078 | ||||
-rw-r--r-- | stdlib/scanf.mli | 10 |
9 files changed, 614 insertions, 1802 deletions
diff --git a/stdlib/Compflags b/stdlib/Compflags index 1a66d1591..7c023d98b 100755 --- a/stdlib/Compflags +++ b/stdlib/Compflags @@ -19,8 +19,8 @@ case $1 in buffer.cmx|buffer.p.cmx) echo ' -inline 3';; # make sure add_char is inlined (PR#5872) buffer.cm[io]) echo ' -w A';; - camlinternalFormat.cm[io]) echo ' -w a';; - printf.cm[io]|format.cm[io]|scanf.cm[io]) echo ' -w A';; + camlinternalFormat.cm[io]) echo ' -w Ae';; + printf.cm[io]|format.cm[io]|scanf.cm[io]) echo ' -w Ae';; scanf.cmx|scanf.p.cmx) echo ' -inline 9';; *Labels.cm[ox]|*Labels.p.cmx) echo ' -nolabels -no-alias-deps';; *) echo ' ';; diff --git a/stdlib/camlinternalFormat.ml b/stdlib/camlinternalFormat.ml index c915e0329..1cdf856a2 100644 --- a/stdlib/camlinternalFormat.ml +++ b/stdlib/camlinternalFormat.ml @@ -976,9 +976,9 @@ fun k o acc fmt -> match fmt with (*fun _ -> make_printf k o (Acc_string (acc, string_of_fmtty fmtty)) rest*) fun (_, str) -> make_printf k o (Acc_string (acc, str)) rest | Format_subst (_, _, fmtty, rest) -> - (* Call to type_format can't failed (raise Type_mismatch). *) + (* Call to type_format can't fail (raise Type_mismatch). *) fun (fmt, _) -> make_printf k o acc - CamlinternalFormatBasics.(concat_fmt (type_format fmt fmtty) rest) + (concat_fmt (type_format fmt fmtty) rest) | Scan_char_set (_, _, rest) -> let new_acc = Acc_invalid_arg (acc, "Printf: bad conversion %[") in @@ -1215,12 +1215,11 @@ let rec strput_acc b acc = match acc with (* Error managment *) (* Raise a Failure with a pretty-printed error message. *) -(* Since it uses "compiled formats", it can't be implemented in bootstrap - mode. *) -let failwith_message _ = - failwith - "CamlinternalFormat failure \ - (error messages not implemented at bootstrap time)" +let failwith_message + ((fmt, _) : ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.format6) = + let buf = Buffer.create 256 in + let k () acc = strput_acc buf acc; failwith (Buffer.contents buf) in + make_printf k () End_of_acc fmt (******************************************************************************) (* Parsing tools *) diff --git a/stdlib/format.ml b/stdlib/format.ml index 18de7e24c..12754903e 100644 --- a/stdlib/format.ml +++ b/stdlib/format.ml @@ -29,6 +29,10 @@ external int_of_size : size -> int = "%identity" (* Tokens are one of the following : *) +type block_type + = CamlinternalFormatBasics.block_type + = Pp_hbox | Pp_vbox | Pp_hvbox | Pp_hovbox | Pp_box | Pp_fits + type pp_token = | Pp_text of string (* normal text *) | Pp_break of int * int (* complete break *) @@ -46,21 +50,7 @@ type pp_token = and tag = string -and block_type = -| Pp_hbox (* Horizontal block no line breaking *) -| Pp_vbox (* Vertical block each break leads to a new line *) -| Pp_hvbox (* Horizontal-vertical block: same as vbox, except if this block - is small enough to fit on a single line *) -| Pp_hovbox (* Horizontal or Vertical block: breaks lead to new line - only when necessary to print the content of the block *) -| Pp_box (* Horizontal or Indent block: breaks lead to new line - only when necessary to print the content of the block, or - when it leads to a new indentation of the current line *) -| Pp_fits (* Internal usage: when a block fits on a single line *) - -and tblock = - | Pp_tbox of int list ref (* Tabulation box *) -;; +and tblock = Pp_tbox of int list ref (* Tabulation box *) (* The Queue: contains all formatting elements. @@ -241,7 +231,8 @@ let pp_infinity = 1000000010;; let pp_output_string state s = state.pp_out_string s 0 (String.length s) and pp_output_newline state = state.pp_out_newline () and pp_output_spaces state n = state.pp_out_spaces n -;; + +let pp_output_char state c = pp_output_string state (String.make 1 c) (* To format a break, indenting a new line. *) let break_new_line state offset width = @@ -1069,309 +1060,71 @@ and set_tags = pp_set_tags std_formatter ;; - -(************************************************************** - - Printf implementation. - - **************************************************************) - -module Sformat = Printf.CamlinternalPr.Sformat;; -module Tformat = Printf.CamlinternalPr.Tformat;; - -(* Error messages when processing formats. *) - -(* Trailer: giving up at character number ... *) -let giving_up mess fmt i = - Printf.sprintf - "Format.fprintf: %s \'%s\', giving up at character number %d%s" - mess (Sformat.to_string fmt) i - (if i < Sformat.length fmt - then Printf.sprintf " (%c)." (Sformat.get fmt i) - else Printf.sprintf "%c" '.') -;; - -(* When an invalid format deserves a special error explanation. *) -let format_invalid_arg mess fmt i = invalid_arg (giving_up mess fmt i);; - -(* Standard invalid format. *) -let invalid_format fmt i = format_invalid_arg "bad format" fmt i;; - -(* Cannot find a valid integer into that format. *) -let invalid_integer fmt i = - invalid_arg (giving_up "bad integer specification" fmt i);; - -(* Finding an integer size out of a sub-string of the format. *) -let format_int_of_string fmt i s = - let sz = - try int_of_string s with - | Failure _ -> invalid_integer fmt i in - size_of_int sz -;; - -(* Getting strings out of buffers. *) -let get_buffer_out b = - let s = Buffer.contents b in - Buffer.reset b; - s -;; - -(* [ppf] is supposed to be a pretty-printer that outputs to buffer [b]: - to extract the contents of [ppf] as a string we flush [ppf] and get the - string out of [b]. *) -let string_out b ppf = - pp_flush_queue ppf false; - get_buffer_out b -;; - -(* Applies [printer] to a formatter that outputs on a fresh buffer, - then returns the resulting material. *) -let exstring printer arg = - let b = Buffer.create 512 in - let ppf = formatter_of_buffer b in - printer ppf arg; - string_out b ppf -;; - -(* To turn out a character accumulator into the proper string result. *) -let implode_rev s0 = function - | [] -> s0 - | l -> String.concat "" (List.rev (s0 :: l)) -;; - -(* [mkprintf] is the printf-like function generator: given the - - [to_s] flag that tells if we are printing into a string, - - the [get_out] function that has to be called to get a [ppf] function to - output onto, - it generates a [kprintf] function that takes as arguments a [k] - continuation function to be called at the end of formatting, - and a printing format string to print the rest of the arguments - according to the format string. - Regular [fprintf]-like functions of this module are obtained via partial - applications of [mkprintf]. *) -let mkprintf to_s get_out k fmt = - - (* [out] is global to this definition of [pr], and must be shared by all its - recursive calls (if any). *) - let out = get_out fmt in - let print_as = ref None in - let outc c = - match !print_as with - | None -> pp_print_char out c - | Some size -> - pp_print_as_size out size (String.make 1 c); - print_as := None - and outs s = - match !print_as with - | None -> pp_print_string out s - | Some size -> - pp_print_as_size out size s; - print_as := None - and flush out = pp_print_flush out () in - - let rec pr k n fmt v = - - let len = Sformat.length fmt in - - let rec doprn n i = - if i >= len then Obj.magic (k out) else - match Sformat.get fmt i with - | '%' -> - Tformat.scan_format fmt v n i cont_s cont_a cont_t cont_f cont_m - | '@' -> - let i = succ i in - if i >= len then invalid_format fmt i else - begin match Sformat.get fmt i with - | '[' -> - do_pp_open_box out n (succ i) - | ']' -> - pp_close_box out (); - doprn n (succ i) - | '{' -> - do_pp_open_tag out n (succ i) - | '}' -> - pp_close_tag out (); - doprn n (succ i) - | ' ' -> - pp_print_space out (); - doprn n (succ i) - | ',' -> - pp_print_cut out (); - doprn n (succ i) - | '?' -> - pp_print_flush out (); - doprn n (succ i) - | '.' -> - pp_print_newline out (); - doprn n (succ i) - | '\n' -> - pp_force_newline out (); - doprn n (succ i) - | ';' -> - do_pp_break out n (succ i) - | '<' -> - let got_size size n i = - print_as := Some size; - doprn n (skip_gt i) in - get_int n (succ i) got_size - | '@' -> - outc '@'; - doprn n (succ i) - | _ -> invalid_format fmt i - end - | c -> outc c; doprn n (succ i) - - and cont_s n s i = - outs s; doprn n i - and cont_a n printer arg i = - if to_s then - outs ((Obj.magic printer : unit -> _ -> string) () arg) - else - printer out arg; - doprn n i - and cont_t n printer i = - if to_s then - outs ((Obj.magic printer : unit -> string) ()) - else - printer out; - doprn n i - and cont_f n i = - flush out; doprn n i - and cont_m n xf i = - let m = - Sformat.add_int_index - (Tformat.count_printing_arguments_of_format xf) n in - pr (Obj.magic (fun _ -> doprn m i)) n xf v - - and get_int n i c = - if i >= len then invalid_integer fmt i else - match Sformat.get fmt i with - | ' ' -> get_int n (succ i) c - | '%' -> - let cont_s n s i = c (format_int_of_string fmt i s) n i - and cont_a _n _printer _arg i = invalid_integer fmt i - and cont_t _n _printer i = invalid_integer fmt i - and cont_f _n i = invalid_integer fmt i - and cont_m _n _sfmt i = invalid_integer fmt i in - Tformat.scan_format fmt v n i cont_s cont_a cont_t cont_f cont_m - | _ -> - let rec get j = - if j >= len then invalid_integer fmt j else - match Sformat.get fmt j with - | '0' .. '9' | '-' -> get (succ j) - | _ -> - let size = - if j = i then size_of_int 0 else - let s = Sformat.sub fmt (Sformat.index_of_int i) (j - i) in - format_int_of_string fmt j s in - c size n j in - get i - - and skip_gt i = - if i >= len then invalid_format fmt i else - match Sformat.get fmt i with - | ' ' -> skip_gt (succ i) - | '>' -> succ i - | _ -> invalid_format fmt i - - and get_box_kind i = - if i >= len then Pp_box, i else - match Sformat.get fmt i with - | 'h' -> - let i = succ i in - if i >= len then Pp_hbox, i else - begin match Sformat.get fmt i with - | 'o' -> - let i = succ i in - if i >= len then format_invalid_arg "bad box format" fmt i else - begin match Sformat.get fmt i with - | 'v' -> Pp_hovbox, succ i - | c -> - format_invalid_arg - ("bad box name ho" ^ String.make 1 c) fmt i - end - | 'v' -> Pp_hvbox, succ i - | _ -> Pp_hbox, i - end - | 'b' -> Pp_box, succ i - | 'v' -> Pp_vbox, succ i - | _ -> Pp_box, i - - and get_tag_name n i c = - let rec get accu n i j = - if j >= len then - c (implode_rev - (Sformat.sub fmt (Sformat.index_of_int i) (j - i)) - accu) - n j else - match Sformat.get fmt j with - | '>' -> - c (implode_rev - (Sformat.sub fmt (Sformat.index_of_int i) (j - i)) - accu) - n j - | '%' -> - let s0 = Sformat.sub fmt (Sformat.index_of_int i) (j - i) in - let cont_s n s i = get (s :: s0 :: accu) n i i - and cont_a n printer arg i = - let s = - if to_s - then (Obj.magic printer : unit -> _ -> string) () arg - else exstring printer arg in - get (s :: s0 :: accu) n i i - and cont_t n printer i = - let s = - if to_s - then (Obj.magic printer : unit -> string) () - else exstring (fun ppf () -> printer ppf) () in - get (s :: s0 :: accu) n i i - and cont_f _n i = - format_invalid_arg "bad tag name specification" fmt i - and cont_m _n _sfmt i = - format_invalid_arg "bad tag name specification" fmt i in - Tformat.scan_format fmt v n j cont_s cont_a cont_t cont_f cont_m - | _ -> get accu n i (succ j) in - get [] n i i - - and do_pp_break ppf n i = - if i >= len then begin pp_print_space ppf (); doprn n i end else - match Sformat.get fmt i with - | '<' -> - let rec got_nspaces nspaces n i = - get_int n i (got_offset nspaces) - and got_offset nspaces offset n i = - pp_print_break ppf (int_of_size nspaces) (int_of_size offset); - doprn n (skip_gt i) in - get_int n (succ i) got_nspaces - | _c -> pp_print_space ppf (); doprn n i - - and do_pp_open_box ppf n i = - if i >= len then begin pp_open_box_gen ppf 0 Pp_box; doprn n i end else - match Sformat.get fmt i with - | '<' -> - let kind, i = get_box_kind (succ i) in - let got_size size n i = - pp_open_box_gen ppf (int_of_size size) kind; - doprn n (skip_gt i) in - get_int n i got_size - | _c -> pp_open_box_gen ppf 0 Pp_box; doprn n i - - and do_pp_open_tag ppf n i = - if i >= len then begin pp_open_tag ppf ""; doprn n i end else - match Sformat.get fmt i with - | '<' -> - let got_name tag_name n i = - pp_open_tag ppf tag_name; - doprn n (skip_gt i) in - get_tag_name n (succ i) got_name - | _c -> pp_open_tag ppf ""; doprn n i in - - doprn n 0 in - - let kpr = pr k (Sformat.index_of_int 0) in - - Tformat.kapr kpr fmt -;; + (************************************************************** + + Defining continuations to be passed as arguments of + CamlinternalFormat.make_printf. + + **************************************************************) + +open CamlinternalFormatBasics +open CamlinternalFormat + +(* Interpret a formatting entity on a formatter. *) +let output_formatting ppf fmting = match fmting with + | Open_box (_, bty, indent) -> pp_open_box_gen ppf indent bty + | Close_box -> pp_close_box ppf () + | Open_tag (_, name) -> pp_open_tag ppf name + | Close_tag -> pp_close_tag ppf () + | Break (_, width, offset) -> pp_print_break ppf width offset + | FFlush -> pp_print_flush ppf () + | Force_newline -> pp_force_newline ppf () + | Flush_newline -> pp_print_newline ppf () + | Magic_size (_, _) -> () + | Escaped_at -> pp_output_char ppf '@' + | Escaped_percent -> pp_output_char ppf '%' + | Scan_indic c -> pp_output_char ppf '@'; pp_output_char ppf c + +(* Recursively output an "accumulator" containing a reversed list of + printing entities (string, char, flus, ...) in an output_stream. *) +(* Differ from Printf.output_acc by the interpretation of formatting. *) +(* Used as a continuation of CamlinternalFormat.make_printf. *) +let rec output_acc ppf acc = match acc with + | Acc_string (Acc_formatting (p, Magic_size (_, size)), s) -> + output_acc ppf p; + pp_print_as_size ppf (size_of_int size) s; + | Acc_char (Acc_formatting (p, Magic_size (_, size)), c) -> + output_acc ppf p; + pp_print_as_size ppf (size_of_int size) (String.make 1 c); + | Acc_formatting (p, f) -> output_acc ppf p; output_formatting ppf f; + | Acc_string (p, s) -> output_acc ppf p; pp_print_string ppf s; + | Acc_char (p, c) -> output_acc ppf p; pp_print_char ppf c; + | Acc_delay (p, f) -> output_acc ppf p; f ppf; + | Acc_flush p -> output_acc ppf p; pp_print_flush ppf (); + | Acc_invalid_arg (p, msg) -> output_acc ppf p; invalid_arg msg; + | End_of_acc -> () + +(* Recursively output an "accumulator" containing a reversed list of + printing entities (string, char, flus, ...) in a buffer. *) +(* Differ from Printf.bufput_acc by the interpretation of formatting. *) +(* Used as a continuation of CamlinternalFormat.make_printf. *) +let rec strput_acc ppf acc = match acc with + | Acc_string (Acc_formatting (p, Magic_size (_, size)), s) -> + strput_acc ppf p; + pp_print_as_size ppf (size_of_int size) s; + | Acc_char (Acc_formatting (p, Magic_size (_, size)), c) -> + strput_acc ppf p; + pp_print_as_size ppf (size_of_int size) (String.make 1 c); + | Acc_delay (Acc_formatting (p, Magic_size (_, size)), f) -> + strput_acc ppf p; + pp_print_as_size ppf (size_of_int size) (f ()); + | Acc_formatting (p, f) -> strput_acc ppf p; output_formatting ppf f; + | Acc_string (p, s) -> strput_acc ppf p; pp_print_string ppf s; + | Acc_char (p, c) -> strput_acc ppf p; pp_print_char ppf c; + | Acc_delay (p, f) -> strput_acc ppf p; pp_print_string ppf (f ()); + | Acc_flush p -> strput_acc ppf p; pp_print_flush ppf (); + | Acc_invalid_arg (p, msg) -> strput_acc ppf p; invalid_arg msg; + | End_of_acc -> () (************************************************************** @@ -1379,30 +1132,37 @@ let mkprintf to_s get_out k fmt = **************************************************************) -let kfprintf k ppf = mkprintf false (fun _ -> ppf) k;; -let ikfprintf k ppf = Tformat.kapr (fun _ _ -> Obj.magic (k ppf));; - -let fprintf ppf = kfprintf ignore ppf;; -let ifprintf ppf = ikfprintf ignore ppf;; -let printf fmt = fprintf std_formatter fmt;; -let eprintf fmt = fprintf err_formatter fmt;; - -let ksprintf k = +let kfprintf k o (fmt, _) = + make_printf (fun o acc -> output_acc o acc; k o) o End_of_acc fmt +let ikfprintf k x (fmt, _) = + make_printf (fun _ _ -> k x) x End_of_acc fmt + +let fprintf ppf fmt = kfprintf ignore ppf fmt +let ifprintf ppf fmt = ikfprintf ignore ppf fmt +let printf fmt = fprintf std_formatter fmt +let eprintf fmt = fprintf err_formatter fmt + +let ksprintf k (fmt, _) = + let k' () acc = + let b = Buffer.create 512 in + let ppf = formatter_of_buffer b in + strput_acc ppf acc; + pp_flush_queue ppf false; + k (Buffer.contents b) in + make_printf k' () End_of_acc fmt + +let sprintf fmt = + ksprintf (fun s -> s) fmt + +let asprintf (fmt, _) = let b = Buffer.create 512 in - let k ppf = k (string_out b ppf) in - let ppf = formatter_of_buffer b in - let get_out _ = ppf in - mkprintf true get_out k -;; - -let sprintf fmt = ksprintf (fun s -> s) fmt;; - -let asprintf fmt = - let b = Buffer.create 512 in - let k ppf = string_out b ppf in - let ppf = formatter_of_buffer b in - let get_out _ = ppf in - mkprintf false get_out k fmt;; + let ppf = formatter_of_buffer b in + let k' : (formatter -> (formatter, unit) acc -> string) + = fun ppf acc -> + output_acc ppf acc; + pp_flush_queue ppf false; + Buffer.contents b in + make_printf k' ppf End_of_acc fmt (************************************************************** @@ -1410,15 +1170,10 @@ let asprintf fmt = **************************************************************) -let kbprintf k b = - mkprintf false (fun _ -> formatter_of_buffer b) k -;; - (* Deprecated error prone function bprintf. *) -let bprintf b = - let k ppf = pp_flush_queue ppf false in - kbprintf k b -;; +let bprintf b ((fmt, _) : ('a, formatter, unit) format) = + let k ppf acc = output_acc ppf acc; pp_flush_queue ppf false in + make_printf k (formatter_of_buffer b) End_of_acc fmt (* Deprecated alias for ksprintf. *) let kprintf = ksprintf;; diff --git a/stdlib/pervasives.ml b/stdlib/pervasives.ml index 6f7e27792..c629229d7 100644 --- a/stdlib/pervasives.ml +++ b/stdlib/pervasives.ml @@ -976,34 +976,21 @@ fun ign fmt -> match ign with Param_format_EBB (Scan_char_set (width_opt, char_set, fmt)) end -(*type ('a, 'b, 'c, 'd, 'e, 'f) format6 = - ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.format6*) +type ('a, 'b, 'c, 'd, 'e, 'f) format6 = + ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.format6 -(* Aliases of format6 with restricted parameters. *) -(* Usefull for Printf and Format functions. *) type ('a, 'b, 'c, 'd) format4 = ('a, 'b, 'c, 'c, 'c, 'd) format6 + type ('a, 'b, 'c) format = ('a, 'b, 'c, 'c) format4 +let string_of_format (fmt, str) = str + external format_of_string : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> ('a, 'b, 'c, 'd, 'e, 'f) format6 = "%identity" -external format_to_string : - ('a, 'b, 'c, 'd, 'e, 'f) format6 -> string = "%identity" -external string_to_format : - string -> ('a, 'b, 'c, 'd, 'e, 'f) format6 = "%identity" - -let (( ^^ ) : - ('a, 'b, 'c, 'd, 'e, 'f) format6 -> - ('f, 'b, 'c, 'e, 'g, 'h) format6 -> - ('a, 'b, 'c, 'd, 'g, 'h) format6) = - fun fmt1 fmt2 -> - string_to_format (format_to_string fmt1 ^ "%," ^ format_to_string fmt2) -;; - -(* Have to return a copy for compatibility with unsafe-string mode *) -(* String.copy is not available here, so use ^ to make a copy of the string *) -let string_of_format fmt = format_to_string fmt ^ "" +let (^^) (fmt1, str1) (fmt2, str2) = + (CamlinternalFormatBasics.concat_fmt fmt1 fmt2, str1 ^ str2) (* Miscellaneous *) diff --git a/stdlib/pervasives.mli b/stdlib/pervasives.mli index 7755af816..be0e95847 100644 --- a/stdlib/pervasives.mli +++ b/stdlib/pervasives.mli @@ -1248,8 +1248,8 @@ end receiver function. *) -(*type ('a, 'b, 'c, 'd, 'e, 'f) format6 = - ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.format6*) +type ('a, 'b, 'c, 'd, 'e, 'f) format6 = + ('a, 'b, 'c, 'd, 'e, 'f) CamlinternalFormatBasics.format6 type ('a, 'b, 'c, 'd) format4 = ('a, 'b, 'c, 'c, 'c, 'd) format6 diff --git a/stdlib/printf.ml b/stdlib/printf.ml index 54052e820..6423e2285 100644 --- a/stdlib/printf.ml +++ b/stdlib/printf.ml @@ -11,728 +11,28 @@ (* *) (***********************************************************************) -external format_float: string -> float -> string - = "caml_format_float" -external format_int: string -> int -> string - = "caml_format_int" -external format_int32: string -> int32 -> string - = "caml_int32_format" -external format_nativeint: string -> nativeint -> string - = "caml_nativeint_format" -external format_int64: string -> int64 -> string - = "caml_int64_format" - -module Sformat = struct - - type index;; - - external unsafe_index_of_int : int -> index = "%identity" - ;; - let index_of_int i = - if i >= 0 then unsafe_index_of_int i - else failwith ("Sformat.index_of_int: negative argument " ^ string_of_int i) - ;; - external int_of_index : index -> int = "%identity" - ;; - - let add_int_index i idx = index_of_int (i + int_of_index idx);; - let succ_index = add_int_index 1;; - (* Literal position are one-based (hence pred p instead of p). *) - let index_of_literal_position p = index_of_int (pred p);; - - external length : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> int - = "%string_length" - ;; - external get : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> int -> char - = "%string_safe_get" - ;; - external unsafe_get : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> int -> char - = "%string_unsafe_get" - ;; - external unsafe_to_string : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> string - = "%identity" - ;; - let sub fmt idx len = - String.sub (unsafe_to_string fmt) (int_of_index idx) len - ;; - let to_string fmt = sub fmt (unsafe_index_of_int 0) (length fmt) - ;; - -end -;; - -let bad_conversion sfmt i c = - invalid_arg - ("Printf: bad conversion %" ^ String.make 1 c ^ ", at char number " ^ - string_of_int i ^ " in format string \'" ^ sfmt ^ "\'") -;; - -let bad_conversion_format fmt i c = - bad_conversion (Sformat.to_string fmt) i c -;; - -let incomplete_format fmt = - invalid_arg - ("Printf: premature end of format string \'" ^ - Sformat.to_string fmt ^ "\'") -;; - -(* Parses a string conversion to return the specified length and the - padding direction. *) -let parse_string_conversion sfmt = - let rec parse neg i = - if i >= String.length sfmt then (0, neg) else - match String.unsafe_get sfmt i with - | '1'..'9' -> - (int_of_string - (String.sub sfmt i (String.length sfmt - i - 1)), - neg) - | '-' -> - parse true (succ i) - | _ -> - parse neg (succ i) in - try parse false 1 with - | Failure _ -> bad_conversion sfmt 0 's' -;; - -(* Pad a (sub) string into a blank string of length [p], - on the right if [neg] is true, on the left otherwise. *) -let pad_string pad_char p neg s i len = - if p = len && i = 0 then s else - if p <= len then String.sub s i len else - let res = Bytes.make p pad_char in - if neg - then String.blit s i res 0 len - else String.blit s i res (p - len) len; - Bytes.unsafe_to_string res -;; - -(* Format a string given a %s format, e.g. %40s or %-20s. - To do ?: ignore other flags (#, +, etc). *) -let format_string sfmt s = - let (p, neg) = parse_string_conversion sfmt in - pad_string ' ' p neg s 0 (String.length s) -;; - -(* Extract a format string out of [fmt] between [start] and [stop] inclusive. - ['*'] in the format are replaced by integers taken from the [widths] list. - [extract_format] returns a string which is the string representation of - the resulting format string. *) -let extract_format fmt start stop widths = - let skip_positional_spec start = - match Sformat.unsafe_get fmt start with - | '0'..'9' -> - let rec skip_int_literal i = - match Sformat.unsafe_get fmt i with - | '0'..'9' -> skip_int_literal (succ i) - | '$' -> succ i - | _ -> start in - skip_int_literal (succ start) - | _ -> start in - let start = skip_positional_spec (succ start) in - let b = Buffer.create (stop - start + 10) in - Buffer.add_char b '%'; - let rec fill_format i widths = - if i <= stop then - match (Sformat.unsafe_get fmt i, widths) with - | ('*', h :: t) -> - Buffer.add_string b (string_of_int h); - let i = skip_positional_spec (succ i) in - fill_format i t - | ('*', []) -> - assert false (* Should not happen since this is ill-typed. *) - | (c, _) -> - Buffer.add_char b c; - fill_format (succ i) widths in - fill_format start (List.rev widths); - Buffer.contents b -;; - -let extract_format_int conv fmt start stop widths = - let sfmt = extract_format fmt start stop widths in - match conv with - | 'n' | 'N' -> - let len = String.length sfmt in - String.sub sfmt 0 (len - 1) ^ "u" - | _ -> sfmt -;; - -let extract_format_float conv fmt start stop widths = - let sfmt = extract_format fmt start stop widths in - match conv with - | 'F' -> - let len = String.length sfmt in - String.sub sfmt 0 (len - 1) ^ "g" - | _ -> sfmt -;; - -(* Returns the position of the next character following the meta format - string, starting from position [i], inside a given format [fmt]. - According to the character [conv], the meta format string is - enclosed by the delimiters %{ and %} (when [conv = '{']) or %( and - %) (when [conv = '(']). Hence, [sub_format] returns the index of - the character following the [')'] or ['}'] that ends the meta format, - according to the character [conv]. *) -let sub_format incomplete_format bad_conversion_format conv fmt i = - let len = Sformat.length fmt in - let rec sub_fmt c i = - let close = if c = '(' then ')' else (* '{' *) '}' in - let rec sub j = - if j >= len then incomplete_format fmt else - match Sformat.get fmt j with - | '%' -> sub_sub (succ j) - | _ -> sub (succ j) - and sub_sub j = - if j >= len then incomplete_format fmt else - match Sformat.get fmt j with - | '(' | '{' as c -> - let j = sub_fmt c (succ j) in - sub (succ j) - | '}' | ')' as c -> - if c = close then succ j else bad_conversion_format fmt i c - | _ -> sub (succ j) in - sub i in - sub_fmt conv i -;; - -let sub_format_for_printf conv = - sub_format incomplete_format bad_conversion_format conv -;; - -let iter_on_format_args fmt add_conv add_char = - - let lim = Sformat.length fmt - 1 in - - let rec scan_flags skip i = - if i > lim then incomplete_format fmt else - match Sformat.unsafe_get fmt i with - | '*' -> scan_flags skip (add_conv skip i 'i') - (* | '$' -> scan_flags skip (succ i) *** PR#4321 *) - | '#' | '-' | ' ' | '+' -> scan_flags skip (succ i) - | '_' -> scan_flags true (succ i) - | '0'..'9' - | '.' -> scan_flags skip (succ i) - | _ -> scan_conv skip i - and scan_conv skip i = - if i > lim then incomplete_format fmt else - match Sformat.unsafe_get fmt i with - | '%' | '@' | '!' | ',' -> succ i - | 's' | 'S' | '[' -> add_conv skip i 's' - | 'c' | 'C' -> add_conv skip i 'c' - | 'd' | 'i' |'o' | 'u' | 'x' | 'X' | 'N' -> add_conv skip i 'i' - | 'f' | 'e' | 'E' | 'g' | 'G' | 'F' -> add_conv skip i 'f' - | 'B' | 'b' -> add_conv skip i 'B' - | 'a' | 'r' | 't' as conv -> add_conv skip i conv - | 'l' | 'n' | 'L' as conv -> - let j = succ i in - if j > lim then add_conv skip i 'i' else begin - match Sformat.get fmt j with - | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' -> - add_char (add_conv skip i conv) 'i' - | _ -> add_conv skip i 'i' end - | '{' as conv -> - (* Just get a regular argument, skipping the specification. *) - let i = add_conv skip i conv in - (* To go on, find the index of the next char after the meta format. *) - let j = sub_format_for_printf conv fmt i in - (* Add the meta specification to the summary anyway. *) - let rec loop i = - if i < j - 2 then loop (add_char i (Sformat.get fmt i)) in - loop i; - (* Go on, starting at the closing brace to properly close the meta - specification in the summary. *) - scan_conv skip (j - 1) - | '(' as conv -> - (* Use the static format argument specification instead of - the runtime format argument value: they must have the same type - anyway. *) - scan_fmt (add_conv skip i conv) - | '}' | ')' as conv -> add_conv skip i conv - | conv -> bad_conversion_format fmt i conv - - and scan_fmt i = - if i < lim then - if Sformat.get fmt i = '%' - then scan_fmt (scan_flags false (succ i)) - else scan_fmt (succ i) - else i in - - ignore (scan_fmt 0) -;; - -(* Returns a string that summarizes the typing information that a given - format string contains. - For instance, [summarize_format_type "A number %d\n"] is "%i". - It also checks the well-formedness of the format string. *) -let summarize_format_type fmt = - let len = Sformat.length fmt in - let b = Buffer.create len in - let add_char i c = Buffer.add_char b c; succ i in - let add_conv skip i c = - if skip then Buffer.add_string b "%_" else Buffer.add_char b '%'; - add_char i c in - iter_on_format_args fmt add_conv add_char; - Buffer.contents b -;; - -module Ac = struct - type ac = { - mutable ac_rglr : int; - mutable ac_skip : int; - mutable ac_rdrs : int; - } -end -;; - -open Ac;; - -(* Computes the number of arguments of a format (including the flag - arguments if any). *) -let ac_of_format fmt = - let ac = { ac_rglr = 0; ac_skip = 0; ac_rdrs = 0; } in - let incr_ac skip c = - let inc = if c = 'a' then 2 else 1 in - if c = 'r' then ac.ac_rdrs <- ac.ac_rdrs + 1; - if skip - then ac.ac_skip <- ac.ac_skip + inc - else ac.ac_rglr <- ac.ac_rglr + inc in - let add_conv skip i c = - (* Just finishing a meta format: no additional argument to record. *) - if c <> ')' && c <> '}' then incr_ac skip c; - succ i - and add_char i _ = succ i in - - iter_on_format_args fmt add_conv add_char; - ac -;; - -let count_printing_arguments_of_format fmt = - let ac = ac_of_format fmt in - (* For printing, only the regular arguments have to be counted. *) - ac.ac_rglr -;; - -let list_iter_i f l = - let rec loop i = function - | [] -> () - | [x] -> f i x (* Tail calling [f] *) - | x :: xs -> f i x; loop (succ i) xs in - loop 0 l -;; - -(* 'Abstracting' version of kprintf: returns a (curried) function that - will print when totally applied. - Note: in the following, we are careful not to be badly caught - by the compiler optimizations for the representation of arrays. *) -let kapr kpr fmt = - match count_printing_arguments_of_format fmt with - | 0 -> kpr fmt [||] - | 1 -> Obj.magic (fun x -> - let a = Array.make 1 (Obj.repr 0) in - a.(0) <- x; - kpr fmt a) - | 2 -> Obj.magic (fun x y -> - let a = Array.make 2 (Obj.repr 0) in - a.(0) <- x; a.(1) <- y; - kpr fmt a) - | 3 -> Obj.magic (fun x y z -> - let a = Array.make 3 (Obj.repr 0) in - a.(0) <- x; a.(1) <- y; a.(2) <- z; - kpr fmt a) - | 4 -> Obj.magic (fun x y z t -> - let a = Array.make 4 (Obj.repr 0) in - a.(0) <- x; a.(1) <- y; a.(2) <- z; - a.(3) <- t; - kpr fmt a) - | 5 -> Obj.magic (fun x y z t u -> - let a = Array.make 5 (Obj.repr 0) in - a.(0) <- x; a.(1) <- y; a.(2) <- z; - a.(3) <- t; a.(4) <- u; - kpr fmt a) - | 6 -> Obj.magic (fun x y z t u v -> - let a = Array.make 6 (Obj.repr 0) in - a.(0) <- x; a.(1) <- y; a.(2) <- z; - a.(3) <- t; a.(4) <- u; a.(5) <- v; - kpr fmt a) - | nargs -> - let rec loop i args = - if i >= nargs then - let a = Array.make nargs (Obj.repr 0) in - list_iter_i (fun i arg -> a.(nargs - i - 1) <- arg) args; - kpr fmt a - else Obj.magic (fun x -> loop (succ i) (x :: args)) in - loop 0 [] -;; - -type positional_specification = - | Spec_none | Spec_index of Sformat.index -;; - -(* To scan an optional positional parameter specification, - i.e. an integer followed by a [$]. - - Calling [got_spec] with appropriate arguments, we 'return' a positional - specification and an index to go on scanning the [fmt] format at hand. - - Note that this is optimized for the regular case, i.e. no positional - parameter, since in this case we juste 'return' the constant - [Spec_none]; in case we have a positional parameter, we 'return' a - [Spec_index] [positional_specification] which is a bit more costly. - - Note also that we do not support [*$] specifications, since this would - lead to type checking problems: a [*$] positional specification means - 'take the next argument to [printf] (which must be an integer value)', - name this integer value $n$; [*$] now designates parameter $n$. - - Unfortunately, the type of a parameter specified via a [*$] positional - specification should be the type of the corresponding argument to - [printf], hence this should be the type of the $n$-th argument to [printf] - with $n$ being the {\em value} of the integer argument defining [*]; we - clearly cannot statically guess the value of this parameter in the general - case. Put it another way: this means type dependency, which is completely - out of scope of the OCaml type algebra. *) - -let scan_positional_spec fmt got_spec i = - match Sformat.unsafe_get fmt i with - | '0'..'9' as d -> - let rec get_int_literal accu j = - match Sformat.unsafe_get fmt j with - | '0'..'9' as d -> - get_int_literal (10 * accu + (int_of_char d - 48)) (succ j) - | '$' -> - if accu = 0 then - failwith "printf: bad positional specification (0)." else - got_spec (Spec_index (Sformat.index_of_literal_position accu)) (succ j) - (* Not a positional specification: tell so the caller, and go back to - scanning the format from the original [i] position we were called at - first. *) - | _ -> got_spec Spec_none i in - get_int_literal (int_of_char d - 48) (succ i) - (* No positional specification: tell so the caller, and go back to scanning - the format from the original [i] position. *) - | _ -> got_spec Spec_none i -;; - -(* Get the index of the next argument to printf, according to the given - positional specification. *) -let next_index spec n = - match spec with - | Spec_none -> Sformat.succ_index n - | Spec_index _ -> n -;; - -(* Get the index of the actual argument to printf, according to its - optional positional specification. *) -let get_index spec n = - match spec with - | Spec_none -> n - | Spec_index p -> p -;; - -(* Format a float argument as a valid OCaml lexeme. *) -let format_float_lexeme = - - (* To be revised: this procedure should be a unique loop that performs the - validity check and the string lexeme modification at the same time. - Otherwise, it is too difficult to handle the strange padding facilities - given by printf. Let alone handling the correct widths indication, - knowing that we have sometime to add a '.' at the end of the result! - *) - - let make_valid_float_lexeme s = - (* Check if s is already a valid lexeme: - in this case do nothing, - otherwise turn s into a valid OCaml lexeme. *) - let l = String.length s in - let rec valid_float_loop i = - if i >= l then s ^ "." else - match s.[i] with - (* Sure, this is already a valid float lexeme. *) - | '.' | 'e' | 'E' -> s - | _ -> valid_float_loop (i + 1) in - - valid_float_loop 0 in - - (fun sfmt x -> - match classify_float x with - | FP_normal | FP_subnormal | FP_zero -> - make_valid_float_lexeme (format_float sfmt x) - | FP_infinite -> - if x < 0.0 then "neg_infinity" else "infinity" - | FP_nan -> - "nan") -;; - -(* Decode a format string and act on it. - [fmt] is the [printf] format string, and [pos] points to a [%] character in - the format string. - After consuming the appropriate number of arguments and formatting - them, one of the following five continuations described below is called: - - - [cont_s] for outputting a string - (arguments: arg num, string, next pos) - - [cont_a] for performing a %a action - (arguments: arg num, fn, arg, next pos) - - [cont_t] for performing a %t action - (arguments: arg num, fn, next pos) - - [cont_f] for performing a flush action - (arguments: arg num, next pos) - - [cont_m] for performing a %( action - (arguments: arg num, sfmt, next pos) - - "arg num" is the index in array [args] of the next argument to [printf]. - "next pos" is the position in [fmt] of the first character following - the %conversion specification in [fmt]. *) - -(* Note: here, rather than test explicitly against [Sformat.length fmt] - to detect the end of the format, we use [Sformat.unsafe_get] and - rely on the fact that we'll get a "null" character if we access - one past the end of the string. These "null" characters are then - caught by the [_ -> bad_conversion] clauses below. - Don't do this at home, kids. *) -let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m = - - let get_arg spec n = - Obj.magic (args.(Sformat.int_of_index (get_index spec n))) in - - let rec scan_positional n widths i = - let got_spec spec i = scan_flags spec n widths i in - scan_positional_spec fmt got_spec i - - and scan_flags spec n widths i = - match Sformat.unsafe_get fmt i with - | '*' -> - let got_spec wspec i = - let (width : int) = get_arg wspec n in - scan_flags spec (next_index wspec n) (width :: widths) i in - scan_positional_spec fmt got_spec (succ i) - | '0'..'9' - | '.' | '#' | '-' | ' ' | '+' -> scan_flags spec n widths (succ i) - | _ -> scan_conv spec n widths i - - and scan_conv spec n widths i = - match Sformat.unsafe_get fmt i with - | '%' | '@' as c -> - cont_s n (String.make 1 c) (succ i) - | '!' -> cont_f n (succ i) - | ',' -> cont_s n "" (succ i) - | 's' | 'S' as conv -> - let (x : string) = get_arg spec n in - let x = if conv = 's' then x else "\"" ^ String.escaped x ^ "\"" in - let s = - (* Optimize for common case %s *) - if i = succ pos then x else - format_string (extract_format fmt pos i widths) x in - cont_s (next_index spec n) s (succ i) - | '[' as conv -> - bad_conversion_format fmt i conv - | 'c' | 'C' as conv -> - let (x : char) = get_arg spec n in - let s = - if conv = 'c' then String.make 1 x else "'" ^ Char.escaped x ^ "'" in - cont_s (next_index spec n) s (succ i) - | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' | 'N' as conv -> - let (x : int) = get_arg spec n in - let s = - format_int (extract_format_int conv fmt pos i widths) x in - cont_s (next_index spec n) s (succ i) - | 'f' | 'e' | 'E' | 'g' | 'G' -> - let (x : float) = get_arg spec n in - let s = format_float (extract_format fmt pos i widths) x in - cont_s (next_index spec n) s (succ i) - | 'F' as conv -> - let (x : float) = get_arg spec n in - let s = - format_float_lexeme - (if widths = [] - then "%.12g" - else extract_format_float conv fmt pos i widths) - x in - cont_s (next_index spec n) s (succ i) - | 'B' | 'b' -> - let (x : bool) = get_arg spec n in - cont_s (next_index spec n) (string_of_bool x) (succ i) - | 'a' -> - let printer = get_arg spec n in - (* If the printer spec is Spec_none, go on as usual. - If the printer spec is Spec_index p, - printer's argument spec is Spec_index (succ_index p). *) - let n = Sformat.succ_index (get_index spec n) in - let arg = get_arg Spec_none n in - cont_a (next_index spec n) printer arg (succ i) - | 'r' as conv -> - bad_conversion_format fmt i conv - | 't' -> - let printer = get_arg spec n in - cont_t (next_index spec n) printer (succ i) - | 'l' | 'n' | 'L' as conv -> - begin match Sformat.unsafe_get fmt (succ i) with - | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' -> - let i = succ i in - let s = - match conv with - | 'l' -> - let (x : int32) = get_arg spec n in - format_int32 (extract_format fmt pos i widths) x - | 'n' -> - let (x : nativeint) = get_arg spec n in - format_nativeint (extract_format fmt pos i widths) x - | _ -> - let (x : int64) = get_arg spec n in - format_int64 (extract_format fmt pos i widths) x in - cont_s (next_index spec n) s (succ i) - | _ -> - let (x : int) = get_arg spec n in - let s = format_int (extract_format_int 'n' fmt pos i widths) x in - cont_s (next_index spec n) s (succ i) - end - | '{' | '(' as conv (* ')' '}' *) -> - let (xf : ('a, 'b, 'c, 'd, 'e, 'f) format6) = get_arg spec n in - let i = succ i in - let i = sub_format_for_printf conv fmt i in - if conv = '{' (* '}' *) then - (* Just print the format argument as a specification. *) - cont_s - (next_index spec n) - (summarize_format_type xf) - i else - (* Use the format argument instead of the format specification. *) - cont_m (next_index spec n) xf i - | (* '(' *) ')' -> - cont_s n "" (succ i) - | conv -> - bad_conversion_format fmt i conv in - - scan_positional n [] (succ pos) -;; - -let mkprintf to_s get_out outc outs flush k fmt = - - (* [out] is global to this definition of [pr], and must be shared by all its - recursive calls (if any). *) - let out = get_out fmt in - let outc c = outc out c in - let outs s = outs out s in - - let rec pr k n fmt v = - - let len = Sformat.length fmt in - - let rec doprn n i = - if i >= len then Obj.magic (k out) else - match Sformat.unsafe_get fmt i with - | '%' -> scan_format fmt v n i cont_s cont_a cont_t cont_f cont_m - | c -> outc c; doprn n (succ i) - - and cont_s n s i = - outs s; doprn n i - and cont_a n printer arg i = - if to_s then - outs ((Obj.magic printer : unit -> _ -> string) () arg) - else - printer out arg; - doprn n i - and cont_t n printer i = - if to_s then - outs ((Obj.magic printer : unit -> string) ()) - else - printer out; - doprn n i - and cont_f n i = - flush out; doprn n i - and cont_m n xf i = - let m = - Sformat.add_int_index - (count_printing_arguments_of_format xf) n in - pr (Obj.magic (fun _ -> doprn m i)) n xf v in - - doprn n 0 in - - let kpr = pr k (Sformat.index_of_int 0) in - - kapr kpr fmt -;; - -(************************************************************** - - Defining [fprintf] and various flavors of [fprintf]. - - **************************************************************) - -let kfprintf k oc = - mkprintf false (fun _ -> oc) output_char output_string flush k -;; -let ikfprintf k oc = kapr (fun _ _ -> Obj.magic (k oc));; - -let fprintf oc = kfprintf ignore oc;; -let ifprintf oc = ikfprintf ignore oc;; -let printf fmt = fprintf stdout fmt;; -let eprintf fmt = fprintf stderr fmt;; - -let kbprintf k b = - mkprintf false (fun _ -> b) Buffer.add_char Buffer.add_string ignore k -;; -let bprintf b = kbprintf ignore b;; - -let get_buff fmt = - let len = 2 * Sformat.length fmt in - Buffer.create len -;; - -let get_contents b = - let s = Buffer.contents b in - Buffer.clear b; - s -;; - -let get_cont k b = k (get_contents b);; - -let ksprintf k = - mkprintf true get_buff Buffer.add_char Buffer.add_string ignore (get_cont k) -;; - -let sprintf fmt = ksprintf (fun s -> s) fmt;; - -(************************************************************** - - Deprecated stuff. - - **************************************************************) - -let kprintf = ksprintf;; - -(* For OCaml system internal use only: needed to implement modules [Format] - and [Scanf]. *) - -module CamlinternalPr = struct - - module Sformat = Sformat;; - - module Tformat = struct - - type ac = - Ac.ac = { - mutable ac_rglr : int; - mutable ac_skip : int; - mutable ac_rdrs : int; - } - ;; - - let ac_of_format = ac_of_format;; - - let count_printing_arguments_of_format = - count_printing_arguments_of_format;; - - let sub_format = sub_format;; - - let summarize_format_type = summarize_format_type;; - - let scan_format = scan_format;; - - let kapr = kapr;; - - end - ;; - -end -;; +open CamlinternalFormat + +let kfprintf k o (fmt, _) = + make_printf (fun o acc -> output_acc o acc; k o) o End_of_acc fmt +let kbprintf k b (fmt, _) = + make_printf (fun b acc -> bufput_acc b acc; k b) b End_of_acc fmt +let ikfprintf k oc (fmt, _) = + make_printf (fun oc _ -> k oc) oc End_of_acc fmt + +let fprintf oc fmt = kfprintf ignore oc fmt +let bprintf b fmt = kbprintf ignore b fmt +let ifprintf oc fmt = ikfprintf ignore oc fmt +let printf fmt = fprintf stdout fmt +let eprintf fmt = fprintf stderr fmt + +let ksprintf k (fmt, _) = + let k' () acc = + let buf = Buffer.create 64 in + strput_acc buf acc; + k (Buffer.contents buf) in + make_printf k' () End_of_acc fmt + +let sprintf fmt = ksprintf (fun s -> s) fmt + +let kprintf = ksprintf diff --git a/stdlib/printf.mli b/stdlib/printf.mli index a75a64181..21e28159a 100644 --- a/stdlib/printf.mli +++ b/stdlib/printf.mli @@ -163,76 +163,5 @@ val kbprintf : (Buffer.t -> 'a) -> Buffer.t -> (** Deprecated *) -val kprintf : (string -> 'a) -> ('b, unit, string, 'a) format4 -> 'b;; +val kprintf : (string -> 'a) -> ('b, unit, string, 'a) format4 -> 'b (** A deprecated synonym for [ksprintf]. *) - -(**/**) - -(* The following is for system use only. Do not call directly. *) - -module CamlinternalPr : sig - - module Sformat : sig - type index;; - - val index_of_int : int -> index;; - external int_of_index : index -> int = "%identity";; - external unsafe_index_of_int : int -> index = "%identity";; - - val succ_index : index -> index;; - val add_int_index : int -> index -> index;; - - val sub : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> index -> int -> string;; - val to_string : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> string;; - external length : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> int - = "%string_length";; - external get : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> int -> char - = "%string_safe_get";; - external unsafe_to_string : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> string - = "%identity";; - external unsafe_get : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> int -> char - = "%string_unsafe_get";; - - end;; - - module Tformat : sig - - type ac = { - mutable ac_rglr : int; - mutable ac_skip : int; - mutable ac_rdrs : int; - };; - - val ac_of_format : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> ac;; - val count_printing_arguments_of_format : - ('a, 'b, 'c, 'd, 'e, 'f) format6 -> int;; - - val sub_format : - (('a, 'b, 'c, 'd, 'e, 'f) format6 -> int) -> - (('a, 'b, 'c, 'd, 'e, 'f) format6 -> int -> char -> int) -> - char -> - ('a, 'b, 'c, 'd, 'e, 'f) format6 -> - int -> - int - - val summarize_format_type : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> string - - val scan_format : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> - 'g array -> - Sformat.index -> - int -> - (Sformat.index -> string -> int -> 'h) -> - (Sformat.index -> 'i -> 'j -> int -> 'h) -> - (Sformat.index -> 'k -> int -> 'h) -> - (Sformat.index -> int -> 'h) -> - (Sformat.index -> ('l, 'm, 'n, 'o, 'p, 'q) format6 -> int -> 'h) -> - 'h - - val kapr : - (('a, 'b, 'c, 'd, 'e, 'f) format6 -> Obj.t array -> 'g) -> - ('a, 'b, 'c, 'd, 'e, 'f) format6 -> - 'g - - end;; - -end;; diff --git a/stdlib/scanf.ml b/stdlib/scanf.ml index 69837c470..f4e97a048 100644 --- a/stdlib/scanf.ml +++ b/stdlib/scanf.ml @@ -11,6 +11,19 @@ (* *) (***********************************************************************) +open CamlinternalFormatBasics +open CamlinternalFormat + +(* alias to avoid warning for ambiguity between + Pervasives.format6 + and CamlinternalFormatBasics.format6 + + (the former is in fact an alias for the latter, + but the ambiguity warning doesn't care) +*) +type ('a, 'b, 'c, 'd, 'e, 'f) format6 = + ('a, 'b, 'c, 'd, 'e, 'f) Pervasives.format6 + (* The run-time library for scanners. *) (* Scanning buffers. *) @@ -402,11 +415,6 @@ end type ('a, 'b, 'c, 'd) scanner = ('a, Scanning.in_channel, 'b, 'c, 'a -> 'd, 'd) format6 -> 'c -;; - -external string_to_format : - string -> ('a, 'b, 'c, 'd, 'e, 'f) format6 = "%identity" -;; (* Reporting errors. *) exception Scan_failure of string;; @@ -429,33 +437,6 @@ let bad_end_of_input message = (Printf.sprintf "scanning of %s failed: \ premature end of file occurred before end of token" message) -;; - -let int_of_width_opt = function - | None -> max_int - | Some width -> width -;; - -let int_of_prec_opt = function - | None -> max_int - | Some prec -> prec -;; - -module Sformat = Printf.CamlinternalPr.Sformat;; -module Tformat = Printf.CamlinternalPr.Tformat;; - -let bad_conversion fmt i c = - invalid_arg - (Printf.sprintf - "scanf: bad conversion %%%C, at char number %i \ - in format string \'%s\'" c i (Sformat.to_string fmt)) -;; - -let incomplete_format fmt = - invalid_arg - (Printf.sprintf "scanf: premature end of format string \'%s\'" - (Sformat.to_string fmt)) -;; let bad_float () = bad_input "no dot or exponent part found in float token" @@ -467,19 +448,15 @@ let character_mismatch_err c ci = let character_mismatch c ci = bad_input (character_mismatch_err c ci) -;; - -let format_mismatch_err fmt1 fmt2 = - Printf.sprintf - "format read \'%s\' does not match specification \'%s\'" fmt1 fmt2 -;; - -let format_mismatch fmt1 fmt2 = bad_input (format_mismatch_err fmt1 fmt2);; -(* Checking that 2 format strings are type compatible. *) -let compatible_format_type fmt1 fmt2 = - Tformat.summarize_format_type (string_to_format fmt1) = - Tformat.summarize_format_type (string_to_format fmt2);; +let rec skip_whites ib = + let c = Scanning.peek_char ib in + if not (Scanning.eof ib) then begin + match c with + | ' ' | '\t' | '\n' | '\r' -> + Scanning.invalidate_current_char ib; skip_whites ib + | _ -> () + end (* Checking that [c] is indeed in the input, then skips it. In this case, the character [c] has been explicitly specified in the @@ -496,28 +473,13 @@ let compatible_format_type fmt1 fmt2 = We are also careful to treat "\r\n" in the input as an end of line marker: it always matches a '\n' specification in the input format string. *) let rec check_char ib c = - let ci = Scanning.checked_peek_char ib in - if ci = c then Scanning.invalidate_current_char ib else begin - match ci with - | '\r' when c = '\n' -> - Scanning.invalidate_current_char ib; check_char ib '\n' - | _ -> character_mismatch c ci - end -;; - -(* Checks that the current char is indeed one of the stopper characters, - then skips it. - Be careful that if ib has no more character this procedure should - just do nothing (since %s@c defaults to the entire rest of the - buffer, when no character c can be found in the input). *) -let ignore_stoppers stps ib = - if stps <> [] && not (Scanning.eof ib) then - let ci = Scanning.peek_char ib in - if List.memq ci stps then Scanning.invalidate_current_char ib else - let sr = String.concat "" (List.map (String.make 1) stps) in - bad_input - (Printf.sprintf "looking for one of range %S, found %C" sr ci) -;; + if c = ' ' then skip_whites ib else + let ci = Scanning.checked_peek_char ib in + if ci = c then Scanning.invalidate_current_char ib else + match ci with + | '\r' when c = '\n' -> + Scanning.invalidate_current_char ib; check_char ib '\n' + | _ -> character_mismatch c ci (* Extracting tokens from the output token buffer. *) @@ -701,7 +663,7 @@ let scan_optionally_signed_int width ib = scan_unsigned_int width ib ;; -let scan_int_conv conv width _prec ib = +let scan_int_conv conv width ib = match conv with | 'b' -> scan_binary_int width ib | 'd' -> scan_optionally_signed_decimal_int width ib @@ -791,7 +753,7 @@ let scan_float width precision ib = scan_exp_part width ib, precision ;; -let scan_Float width precision ib = +let scan_caml_float width precision ib = let width = scan_optionally_signed_decimal_int width ib in if width = 0 then bad_float () else let c = Scanning.peek_char ib in @@ -805,12 +767,11 @@ let scan_Float width precision ib = | 'e' | 'E' -> scan_exp_part width ib | _ -> bad_float () -;; (* Scan a regular string: stops when encountering a space, if no scanning indication has been given; - otherwise, stops when encountering one of the characters in the scanning - indication list [stp]. + otherwise, stops when encountering the characters in the scanning + indication [stp]. It also stops at end of file or when the maximum number of characters has been read.*) let scan_string stp width ib = @@ -818,12 +779,14 @@ let scan_string stp width ib = if width = 0 then width else let c = Scanning.peek_char ib in if Scanning.eof ib then width else - if stp = [] then - match c with - | ' ' | '\t' | '\n' | '\r' -> width - | c -> loop (Scanning.store_char width ib c) else - if List.memq c stp then Scanning.skip_char width ib else - loop (Scanning.store_char width ib c) in + match stp with + | Some c' when c = c' -> Scanning.skip_char width ib + | Some _ -> loop (Scanning.store_char width ib c) + | None -> + match c with + | ' ' | '\t' | '\n' | '\r' -> width + | _ -> loop (Scanning.store_char width ib c) + in loop width ;; @@ -925,7 +888,7 @@ let scan_backslash_char width ib = ;; (* Scan a character (an OCaml token). *) -let scan_Char width ib = +let scan_caml_char width ib = let rec find_start width = match Scanning.checked_peek_char ib with @@ -948,7 +911,7 @@ let scan_Char width ib = ;; (* Scan a delimited string (an OCaml token). *) -let scan_String width ib = +let scan_caml_string width ib = let rec find_start width = match Scanning.checked_peek_char ib with @@ -981,8 +944,7 @@ let scan_String width ib = ;; (* Scan a boolean (an OCaml token). *) -let scan_bool width ib = - if width < 4 then bad_token_length "a boolean" else +let scan_bool ib = let c = Scanning.checked_peek_char ib in let m = match c with @@ -991,560 +953,430 @@ let scan_bool width ib = | c -> bad_input (Printf.sprintf "the character %C cannot start a boolean" c) in - scan_string [] (min width m) ib -;; - -(* Reading char sets in %[...] conversions. *) -type char_set = - | Pos_set of string (* Positive (regular) set. *) - | Neg_set of string (* Negative (complementary) set. *) -;; - - -(* Char sets are read as sub-strings in the format string. *) -let scan_range fmt j = - - let len = Sformat.length fmt in - - let buffer = Buffer.create len in - - let rec scan_closing j = - if j >= len then incomplete_format fmt else - match Sformat.get fmt j with - | ']' -> j, Buffer.contents buffer - | '%' -> - let j = j + 1 in - if j >= len then incomplete_format fmt else - begin match Sformat.get fmt j with - | '%' | '@' as c -> - Buffer.add_char buffer c; - scan_closing (j + 1) - | c -> bad_conversion fmt j c - end - | c -> - Buffer.add_char buffer c; - scan_closing (j + 1) in - - let scan_first_pos j = - if j >= len then incomplete_format fmt else - match Sformat.get fmt j with - | ']' as c -> - Buffer.add_char buffer c; - scan_closing (j + 1) - | _ -> scan_closing j in - - let scan_first_neg j = - if j >= len then incomplete_format fmt else - match Sformat.get fmt j with - | '^' -> - let j = j + 1 in - let k, char_set = scan_first_pos j in - k, Neg_set char_set - | _ -> - let k, char_set = scan_first_pos j in - k, Pos_set char_set in - - scan_first_neg j -;; - -(* Char sets are now represented as bit vectors that are represented as - byte strings. *) - -(* Bit manipulations into bytes. *) -let set_bit_of_byte byte idx b = - (b lsl idx) lor (byte land (* mask idx *) (lnot (1 lsl idx))) -;; - -let get_bit_of_byte byte idx = (byte lsr idx) land 1;; - -(* Bit manipulations in vectors of bytes represented as strings. *) -let set_bit_of_range r c b = - let idx = c land 0x7 in - let ydx = c lsr 3 in - let byte = Bytes.get r ydx in - Bytes.set r ydx (char_of_int (set_bit_of_byte (int_of_char byte) idx b)) -;; - -let get_bit_of_range r c = - let idx = c land 0x7 in - let ydx = c lsr 3 in - let byte = Bytes.get r ydx in - get_bit_of_byte (int_of_char byte) idx -;; - -(* Char sets represented as bit vectors represented as fixed length byte - strings. *) -(* Create a full or empty set of chars. *) -let make_range bit = - let c = char_of_int (if bit = 0 then 0 else 0xFF) in - Bytes.make 32 c -;; - -(* Test if a char belongs to a set of chars. *) -let get_char_in_range r c = get_bit_of_range r (int_of_char c);; - -let bit_not b = (lnot b) land 1;; - -(* Build the bit vector corresponding to the set of characters - that belongs to the string argument [set]. - (In the [Scanf] module [set] is always a sub-string of the format.) *) -let make_char_bit_vect bit set = - let r = make_range (bit_not bit) in - let lim = String.length set - 1 in - let rec loop bit rp i = - if i <= lim then - match set.[i] with - | '-' when rp -> - (* if i = 0 then rp is false (since the initial call is - loop bit false 0). Hence i >= 1 and the following is safe. *) - let c1 = set.[i - 1] in - let i = succ i in - if i > lim then loop bit false (i - 1) else - let c2 = set.[i] in - for j = int_of_char c1 to int_of_char c2 do - set_bit_of_range r j bit done; - loop bit false (succ i) - | _ -> - set_bit_of_range r (int_of_char set.[i]) bit; - loop bit true (succ i) in - loop bit false 0; - r -;; - -(* Compute the predicate on chars corresponding to a char set. *) -let make_predicate bit set stp = - let r = make_char_bit_vect bit set in - List.iter - (fun c -> set_bit_of_range r (int_of_char c) (bit_not bit)) stp; - (fun c -> get_char_in_range r c) -;; + scan_string None m ib -let make_setp stp char_set = - match char_set with - | Pos_set set -> - begin match String.length set with - | 0 -> (fun _ -> 0) - | 1 -> - let p = set.[0] in - (fun c -> if c == p then 1 else 0) - | 2 -> - let p1 = set.[0] and p2 = set.[1] in - (fun c -> if c == p1 || c == p2 then 1 else 0) - | 3 -> - let p1 = set.[0] and p2 = set.[1] and p3 = set.[2] in - if p2 = '-' then make_predicate 1 set stp else - (fun c -> if c == p1 || c == p2 || c == p3 then 1 else 0) - | _ -> make_predicate 1 set stp - end - | Neg_set set -> - begin match String.length set with - | 0 -> (fun _ -> 1) - | 1 -> - let p = set.[0] in - (fun c -> if c != p then 1 else 0) - | 2 -> - let p1 = set.[0] and p2 = set.[1] in - (fun c -> if c != p1 && c != p2 then 1 else 0) - | 3 -> - let p1 = set.[0] and p2 = set.[1] and p3 = set.[2] in - if p2 = '-' then make_predicate 0 set stp else - (fun c -> if c != p1 && c != p2 && c != p3 then 1 else 0) - | _ -> make_predicate 0 set stp - end -;; - -let setp_table = Hashtbl.create 7;; - -let add_setp stp char_set setp = - let char_set_tbl = - try Hashtbl.find setp_table char_set with - | Not_found -> - let char_set_tbl = Hashtbl.create 3 in - Hashtbl.add setp_table char_set char_set_tbl; - char_set_tbl in - Hashtbl.add char_set_tbl stp setp -;; - -let find_setp stp char_set = - try Hashtbl.find (Hashtbl.find setp_table char_set) stp with - | Not_found -> - let setp = make_setp stp char_set in - add_setp stp char_set setp; - setp -;; - -let scan_chars_in_char_set stp char_set width ib = - let rec loop_pos1 cp1 width = - if width = 0 then width else - let c = Scanning.peek_char ib in - if Scanning.eof ib then width else - if c == cp1 - then loop_pos1 cp1 (Scanning.store_char width ib c) - else width - and loop_pos2 cp1 cp2 width = - if width = 0 then width else - let c = Scanning.peek_char ib in - if Scanning.eof ib then width else - if c == cp1 || c == cp2 - then loop_pos2 cp1 cp2 (Scanning.store_char width ib c) - else width - and loop_pos3 cp1 cp2 cp3 width = - if width = 0 then width else - let c = Scanning.peek_char ib in - if Scanning.eof ib then width else - if c == cp1 || c == cp2 || c == cp3 - then loop_pos3 cp1 cp2 cp3 (Scanning.store_char width ib c) - else width - and loop_neg1 cp1 width = - if width = 0 then width else - let c = Scanning.peek_char ib in - if Scanning.eof ib then width else - if c != cp1 - then loop_neg1 cp1 (Scanning.store_char width ib c) - else width - and loop_neg2 cp1 cp2 width = - if width = 0 then width else - let c = Scanning.peek_char ib in - if Scanning.eof ib then width else - if c != cp1 && c != cp2 - then loop_neg2 cp1 cp2 (Scanning.store_char width ib c) - else width - and loop_neg3 cp1 cp2 cp3 width = - if width = 0 then width else +(* Scan a string containing elements in char_set and terminated by scan_indic + if provided. *) +let scan_chars_in_char_set char_set scan_indic width ib = + let rec scan_chars i stp = let c = Scanning.peek_char ib in - if Scanning.eof ib then width else - if c != cp1 && c != cp2 && c != cp3 - then loop_neg3 cp1 cp2 cp3 (Scanning.store_char width ib c) - else width - and loop setp width = - if width = 0 then width else - let c = Scanning.peek_char ib in - if Scanning.eof ib then width else - if setp c == 1 - then loop setp (Scanning.store_char width ib c) - else width in - - let width = - match char_set with - | Pos_set set -> - begin match String.length set with - | 0 -> loop (fun _ -> 0) width - | 1 -> loop_pos1 set.[0] width - | 2 -> loop_pos2 set.[0] set.[1] width - | 3 when set.[1] != '-' -> loop_pos3 set.[0] set.[1] set.[2] width - | _ -> loop (find_setp stp char_set) width end - | Neg_set set -> - begin match String.length set with - | 0 -> loop (fun _ -> 1) width - | 1 -> loop_neg1 set.[0] width - | 2 -> loop_neg2 set.[0] set.[1] width - | 3 when set.[1] != '-' -> loop_neg3 set.[0] set.[1] set.[2] width - | _ -> loop (find_setp stp char_set) width end in - ignore_stoppers stp ib; - width -;; - -let get_count t ib = - match t with - | 'l' -> Scanning.line_count ib - | 'n' -> Scanning.char_count ib - | _ -> Scanning.token_count ib -;; - -let rec skip_whites ib = - let c = Scanning.peek_char ib in - if not (Scanning.eof ib) then begin - match c with - | ' ' | '\t' | '\n' | '\r' -> - Scanning.invalidate_current_char ib; skip_whites ib - | _ -> () - end -;; + if i > 0 && not (Scanning.eof ib) && is_in_char_set char_set c && + int_of_char c <> stp then + let _ = Scanning.store_char max_int ib c in + scan_chars (i - 1) stp; + in + match scan_indic with + | None -> scan_chars width (-1); + | Some c -> + scan_chars width (int_of_char c); + if not (Scanning.eof ib) then + let ci = Scanning.peek_char ib in + if c = ci then Scanning.invalidate_current_char ib + else character_mismatch c ci (* The global error report function for [Scanf]. *) let scanf_bad_input ib = function | Scan_failure s | Failure s -> let i = Scanning.char_count ib in - bad_input (Printf.sprintf "scanf: bad input at char number %i: \'%s\'" i s) + bad_input (Printf.sprintf "scanf: bad input at char number %i: %S" i s) | x -> raise x -;; - -let list_iter_i f l = - let rec loop i = function - | [] -> () - | [x] -> f i x (* Tail calling [f] *) - | x :: xs -> f i x; loop (succ i) xs in - loop 0 l -;; - -let ascanf sc fmt = - let ac = Tformat.ac_of_format fmt in - match ac.Tformat.ac_rdrs with - | 0 -> - Obj.magic (fun f -> sc fmt [||] f) - | 1 -> - Obj.magic (fun x f -> sc fmt [| Obj.repr x |] f) - | 2 -> - Obj.magic (fun x y f -> sc fmt [| Obj.repr x; Obj.repr y; |] f) - | 3 -> - Obj.magic - (fun x y z f -> sc fmt [| Obj.repr x; Obj.repr y; Obj.repr z; |] f) - | nargs -> - let rec loop i args = - if i >= nargs then - let a = Array.make nargs (Obj.repr 0) in - list_iter_i (fun i arg -> a.(nargs - i - 1) <- arg) args; - Obj.magic (fun f -> sc fmt a f) - else Obj.magic (fun x -> loop (succ i) (x :: args)) in - loop 0 [] -;; - -(* The [scan_format] main scanning function. - It takes as arguments: - - an input buffer [ib] from which to read characters, - - an error handling function [ef], - - a format [fmt] that specifies what to read in the input, - - a vector of user's defined readers [rv], - - and a function [f] to pass the tokens read to. - - Then [scan_format] scans the format and the input buffer in parallel to - find out tokens as specified by the format; when it finds one token, it - converts it as specified, remembers the converted value as a future - argument to the function [f], and continues scanning. - - If the entire scanning succeeds (i.e. the format string has been - exhausted and the buffer has provided tokens according to the - format string), [f] is applied to the tokens read. - - If the scanning or some conversion fails, the main scanning function - aborts and applies the scanning buffer and a string that explains - the error to the error handling function [ef] (the error continuation). *) - -let scan_format ib ef fmt rv f = - - let limr = Array.length rv - 1 in - - let return v = Obj.magic v () in - let delay f x () = f x in - let stack f = delay (return f) in - let no_stack f _x = f in - - let rec scan fmt = - - let lim = Sformat.length fmt - 1 in - - let rec scan_fmt ir f i = - if i > lim then ir, f else - match Sformat.unsafe_get fmt i with - | '%' -> scan_skip ir f (succ i) - | ' ' -> skip_whites ib; scan_fmt ir f (succ i) - | c -> check_char ib c; scan_fmt ir f (succ i) - - and scan_skip ir f i = - if i > lim then ir, f else - match Sformat.get fmt i with - | '_' -> scan_limits true ir f (succ i) - | _ -> scan_limits false ir f i - - and scan_limits skip ir f i = - - let rec scan_width i = - if i > lim then incomplete_format fmt else - match Sformat.get fmt i with - | '0' .. '9' as conv -> - let width, i = - read_int_literal (decimal_value_of_char conv) (succ i) in - Some width, i - | _ -> None, i - - and scan_precision i = - begin - match Sformat.get fmt i with - | '.' -> - let precision, i = read_int_literal 0 (succ i) in - (Some precision, i) - | _ -> None, i - end - and read_int_literal accu i = - if i > lim then accu, i else - match Sformat.unsafe_get fmt i with - | '0' .. '9' as c -> - let accu = 10 * accu + decimal_value_of_char c in - read_int_literal accu (succ i) - | _ -> accu, i in - - if i > lim then ir, f else - let width_opt, i = scan_width i in - let prec_opt, i = scan_precision i in - scan_conversion skip width_opt prec_opt ir f i - - and scan_conversion skip width_opt prec_opt ir f i = - let stack = if skip then no_stack else stack in - let width = int_of_width_opt width_opt in - let prec = int_of_prec_opt prec_opt in - match Sformat.get fmt i with - | '%' | '@' as c -> - check_char ib c; - scan_fmt ir f (succ i) - | '!' -> - if not (Scanning.end_of_input ib) - then bad_input "end of input not found" else - scan_fmt ir f (succ i) - | ',' -> - scan_fmt ir f (succ i) - | 's' -> - let i, stp = scan_indication (succ i) in - let _x = scan_string stp width ib in - scan_fmt ir (stack f (token_string ib)) (succ i) - | 'S' -> - let _x = scan_String width ib in - scan_fmt ir (stack f (token_string ib)) (succ i) - | '[' (* ']' *) -> - let i, char_set = scan_range fmt (succ i) in - let i, stp = scan_indication (succ i) in - let _x = scan_chars_in_char_set stp char_set width ib in - scan_fmt ir (stack f (token_string ib)) (succ i) - | ('c' | 'C') when width = 0 -> - let c = Scanning.checked_peek_char ib in - scan_fmt ir (stack f c) (succ i) - | 'c' -> - let _x = scan_char width ib in - scan_fmt ir (stack f (token_char ib)) (succ i) - | 'C' -> - let _x = scan_Char width ib in - scan_fmt ir (stack f (token_char ib)) (succ i) - | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' as conv -> - let _x = scan_int_conv conv width prec ib in - scan_fmt ir (stack f (token_int conv ib)) (succ i) - | 'N' as conv -> - scan_fmt ir (stack f (get_count conv ib)) (succ i) - | 'f' | 'e' | 'E' | 'g' | 'G' -> - let _x = scan_float width prec ib in - scan_fmt ir (stack f (token_float ib)) (succ i) - | 'F' -> - let _x = scan_Float width prec ib in - scan_fmt ir (stack f (token_float ib)) (succ i) -(* | 'B' | 'b' when width = Some 0 -> - let _x = scan_bool width ib in - scan_fmt ir (stack f (token_int ib)) (succ i) *) - | 'B' | 'b' -> - let _x = scan_bool width ib in - scan_fmt ir (stack f (token_bool ib)) (succ i) - | 'r' -> - if ir > limr then assert false else - let token = Obj.magic rv.(ir) ib in - scan_fmt (succ ir) (stack f token) (succ i) - | 'l' | 'n' | 'L' as conv0 -> - let i = succ i in - if i > lim then scan_fmt ir (stack f (get_count conv0 ib)) i else begin - match Sformat.get fmt i with - (* This is in fact an integer conversion (e.g. %ld, %ni, or %Lo). *) - | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' as conv1 -> - let _x = scan_int_conv conv1 width prec ib in - (* Look back to the character that triggered the integer conversion - (this character is either 'l', 'n' or 'L') to find the - conversion to apply to the integer token read. *) - begin match conv0 with - | 'l' -> scan_fmt ir (stack f (token_int32 conv1 ib)) (succ i) - | 'n' -> scan_fmt ir (stack f (token_nativeint conv1 ib)) (succ i) - | _ -> scan_fmt ir (stack f (token_int64 conv1 ib)) (succ i) end - (* This is not an integer conversion, but a regular %l, %n or %L. *) - | _ -> scan_fmt ir (stack f (get_count conv0 ib)) i end - | '(' | '{' as conv (* ')' '}' *) -> - let i = succ i in - (* Find [mf], the static specification for the format to read. *) - let j = - Tformat.sub_format - incomplete_format bad_conversion conv fmt i in - let mf = Sformat.sub fmt (Sformat.index_of_int i) (j - 2 - i) in - (* Read [rf], the specified format string in the input buffer, - and check its correctness w.r.t. [mf]. *) - let _x = scan_String width ib in - let rf = token_string ib in - if not (compatible_format_type rf mf) then format_mismatch rf mf else - (* Proceed according to the kind of metaformat found: - - %{ mf %} simply returns [rf] as the token read, - - %( mf %) returns [rf] as the first token read, then - returns a second token obtained by scanning the input with - format string [rf]. - Behaviour for %( mf %) is mandatory for sake of format string - typechecking specification. To get pure format string - substitution behaviour, you should use %_( mf %) that skips the - first (format string) token and hence properly substitutes [mf] by - [rf] in the format string argument. - *) - (* For conversion %{%}, just return this format string as the token - read and go on with the rest of the format string argument. *) - if conv = '{' (* '}' *) then scan_fmt ir (stack f rf) j else - (* Or else, return this format string as the first token read; - then continue scanning using this format string to get - the following token read; - finally go on with the rest of the format string argument. *) - let ir, nf = scan (string_to_format rf) ir (stack f rf) 0 in - (* Return the format string read and the value just read, - then go on with the rest of the format. *) - scan_fmt ir nf j - - | c -> bad_conversion fmt i c - - and scan_indication j = - if j > lim then j - 1, [] else - match Sformat.get fmt j with - | '@' -> - let k = j + 1 in - if k > lim then j - 1, [] else - begin match Sformat.get fmt k with - | '%' -> - let k = k + 1 in - if k > lim then j - 1, [] else - begin match Sformat.get fmt k with - | '%' | '@' as c -> k, [ c ] - | _c -> j - 1, [] - end - | c -> k, [ c ] - end - | _c -> j - 1, [] in - - scan_fmt in - - - Scanning.reset_token ib; - - let v = - try snd (scan fmt 0 (fun () -> f) 0) with - | (Scan_failure _ | Failure _ | End_of_file) as exc -> - stack (delay ef ib) exc in - return v -;; - -let mkscanf ib ef fmt = - let sc = scan_format ib ef in - ascanf sc fmt -;; +(* Get the content of a counter from an input buffer. *) +let get_counter ib counter = match counter with + | Line_counter -> Scanning.line_count ib + | Char_counter -> Scanning.char_count ib + | Token_counter -> Scanning.token_count ib -let kscanf ib ef fmt = mkscanf ib ef fmt;; - -let bscanf ib = kscanf ib scanf_bad_input;; - -let fscanf ic = bscanf (Scanning.from_channel ic);; - -let sscanf : string -> ('a, 'b, 'c, 'd) scanner - = fun s -> bscanf (Scanning.from_string s);; - -let scanf fmt = bscanf Scanning.stdib fmt;; +(* Compute the width of a padding option (see "%42{" and "%123("). *) +let width_of_pad_opt pad_opt = match pad_opt with + | None -> max_int + | Some width -> width -let bscanf_format ib fmt f = - let fmt = Sformat.unsafe_to_string fmt in - let fmt1 = - ignore (scan_String max_int ib); - token_string ib in - if not (compatible_format_type fmt1 fmt) then - format_mismatch fmt1 fmt else - f (string_to_format fmt1) -;; +let stopper_of_formatting fmting = + if fmting = Escaped_percent then '%', "" else + let str = string_of_formatting fmting in + let stp = str.[1] in + let sub_str = String.sub str 2 (String.length str - 2) in + stp, sub_str + +(******************************************************************************) + (* Readers managment *) + +(* A call to take_format_readers on a format is evaluated into functions + taking readers as arguments and aggregate them into an heterogeneous list *) +(* When all readers are taken, finally pass the list of the readers to the + continuation k. *) +let rec take_format_readers : type a c d e f . + ((d, e) heter_list -> e) -> (a, Scanning.in_channel, c, d, e, f) fmt -> + d = +fun k fmt -> match fmt with + | Reader fmt_rest -> + fun reader -> + let new_k readers_rest = k (Cons (reader, readers_rest)) in + take_format_readers new_k fmt_rest + | Char rest -> take_format_readers k rest + | Caml_char rest -> take_format_readers k rest + | String (_, rest) -> take_format_readers k rest + | Caml_string (_, rest) -> take_format_readers k rest + | Int (_, _, _, rest) -> take_format_readers k rest + | Int32 (_, _, _, rest) -> take_format_readers k rest + | Nativeint (_, _, _, rest) -> take_format_readers k rest + | Int64 (_, _, _, rest) -> take_format_readers k rest + | Float (_, _, _, rest) -> take_format_readers k rest + | Bool rest -> take_format_readers k rest + | Alpha rest -> take_format_readers k rest + | Theta rest -> take_format_readers k rest + | Flush rest -> take_format_readers k rest + | String_literal (_, rest) -> take_format_readers k rest + | Char_literal (_, rest) -> take_format_readers k rest + + | Scan_char_set (_, _, rest) -> take_format_readers k rest + | Scan_get_counter (_, rest) -> take_format_readers k rest + + | Formatting (_, rest) -> take_format_readers k rest + + | Format_arg (_, _, rest) -> take_format_readers k rest + | Format_subst (_, _, fmtty, rest) -> take_fmtty_format_readers k fmtty rest + | Ignored_param (ign, rest) -> take_ignored_format_readers k ign rest + + | End_of_format -> k Nil + +(* Take readers associated to an fmtty coming from a Format_subst "%(...%)". *) +and take_fmtty_format_readers : type x y a c d e f . + ((d, e) heter_list -> e) -> (a, Scanning.in_channel, c, d, x, y) fmtty -> + (y, Scanning.in_channel, c, x, e, f) fmt -> d = +fun k fmtty fmt -> match fmtty with + | Reader_ty fmt_rest -> + fun reader -> + let new_k readers_rest = k (Cons (reader, readers_rest)) in + take_fmtty_format_readers new_k fmt_rest fmt + | Ignored_reader_ty fmt_rest -> + fun reader -> + let new_k readers_rest = k (Cons (reader, readers_rest)) in + take_fmtty_format_readers new_k fmt_rest fmt + | Char_ty rest -> take_fmtty_format_readers k rest fmt + | String_ty rest -> take_fmtty_format_readers k rest fmt + | Int_ty rest -> take_fmtty_format_readers k rest fmt + | Int32_ty rest -> take_fmtty_format_readers k rest fmt + | Nativeint_ty rest -> take_fmtty_format_readers k rest fmt + | Int64_ty rest -> take_fmtty_format_readers k rest fmt + | Float_ty rest -> take_fmtty_format_readers k rest fmt + | Bool_ty rest -> take_fmtty_format_readers k rest fmt + | Alpha_ty rest -> take_fmtty_format_readers k rest fmt + | Theta_ty rest -> take_fmtty_format_readers k rest fmt + | Format_arg_ty (_, rest) -> take_fmtty_format_readers k rest fmt + | End_of_fmtty -> take_format_readers k fmt + | Format_subst_ty (_, ty, rest) -> + take_fmtty_format_readers k (concat_fmtty ty rest) fmt + +(* Take readers associated to an ignored parameter. *) +and take_ignored_format_readers : type x y a c d e f . + ((d, e) heter_list -> e) -> (a, Scanning.in_channel, c, d, x, y) ignored -> + (y, Scanning.in_channel, c, x, e, f) fmt -> d = +fun k ign fmt -> match ign with + | Ignored_reader -> + fun reader -> + let new_k readers_rest = k (Cons (reader, readers_rest)) in + take_format_readers new_k fmt + | Ignored_char -> take_format_readers k fmt + | Ignored_caml_char -> take_format_readers k fmt + | Ignored_string _ -> take_format_readers k fmt + | Ignored_caml_string _ -> take_format_readers k fmt + | Ignored_int (_, _) -> take_format_readers k fmt + | Ignored_int32 (_, _) -> take_format_readers k fmt + | Ignored_nativeint (_, _) -> take_format_readers k fmt + | Ignored_int64 (_, _) -> take_format_readers k fmt + | Ignored_float (_, _) -> take_format_readers k fmt + | Ignored_bool -> take_format_readers k fmt + | Ignored_format_arg _ -> take_format_readers k fmt + | Ignored_format_subst (_, fmtty) -> take_fmtty_format_readers k fmtty fmt + | Ignored_scan_char_set _ -> take_format_readers k fmt + +(******************************************************************************) + (* Scanf "%(...%)" tools *) + +(* Type used to cross and substract reader_nb_unifer. *) +(* Used to interface make_format_subst_rnus and convert_fmtty_on_reader_nb. *) +type (_, _, _, _, _, _, _) format_subst_rnus = Format_subst_rnus : + ('d3, 'q3, 'd2, 'q2) reader_nb_unifier * + ('d1, 'q1, 'd3, 'q3) reader_nb_unifier * + ('q1, 'e1, 'q3, 'e3) reader_nb_unifier -> + ('d1, 'q1, 'e1, 'd2, 'q2, 'd3, 'e3) format_subst_rnus + +(* Cross and substract reader_nb_unifers. *) +(* Used when formats contains encapsulated "%(...%)" like "%(..%(..%)..%)". *) +(* See (convert_fmtty_on_reader_nb _ "%(...%)"). *) +let rec make_format_subst_rnus : type d1 q1 e1 d2 q2 d3 e3 . + (d1, e1, d3, e3) reader_nb_unifier -> (d1, q1, d2, q2) reader_nb_unifier -> + (d1, q1, e1, d2, q2, d3, e3) format_subst_rnus = +fun rnu sub_rnu -> match rnu, sub_rnu with + | Succ_reader rnu_rest, Succ_reader sub_rnu_rest -> + let Format_subst_rnus (sub_rnu', sub_fmtty_rnu, rest_rnu) = + make_format_subst_rnus rnu_rest sub_rnu_rest in + Format_subst_rnus(Succ_reader sub_rnu', Succ_reader sub_fmtty_rnu, rest_rnu) + | _, Zero_reader -> + Format_subst_rnus (Zero_reader, Zero_reader, rnu) + | Zero_reader, Succ_reader _ -> + (* Impossible! By hypothesis: rnu > sub_rnu. *) + assert false + +(* Use a reader_nb_unifier to transform 'd and 'e parameters of an fmtty. *) +(* See make_scanf "%(...%)". *) +let rec convert_fmtty_on_reader_nb : type a b c d1 d2 e1 e2 f . + (d1, e1, d2, e2) reader_nb_unifier -> (a, b, c, d1, e1, f) fmtty -> + (a, b, c, d2, e2, f) fmtty = +fun rnu fmtty -> match rnu, fmtty with + | _, Char_ty rest -> Char_ty (convert_fmtty_on_reader_nb rnu rest) + | _, String_ty rest -> String_ty (convert_fmtty_on_reader_nb rnu rest) + | _, Int_ty rest -> Int_ty (convert_fmtty_on_reader_nb rnu rest) + | _, Int32_ty rest -> Int32_ty (convert_fmtty_on_reader_nb rnu rest) + | _, Nativeint_ty rest -> Nativeint_ty (convert_fmtty_on_reader_nb rnu rest) + | _, Int64_ty rest -> Int64_ty (convert_fmtty_on_reader_nb rnu rest) + | _, Float_ty rest -> Float_ty (convert_fmtty_on_reader_nb rnu rest) + | _, Bool_ty rest -> Bool_ty (convert_fmtty_on_reader_nb rnu rest) + | _, Alpha_ty rest -> Alpha_ty (convert_fmtty_on_reader_nb rnu rest) + | _, Theta_ty rest -> Theta_ty (convert_fmtty_on_reader_nb rnu rest) + + | Succ_reader rnu_rest, Reader_ty fmtty_rest -> + Reader_ty (convert_fmtty_on_reader_nb rnu_rest fmtty_rest) + | Succ_reader rnu_rest, Ignored_reader_ty fmtty_rest -> + Ignored_reader_ty (convert_fmtty_on_reader_nb rnu_rest fmtty_rest) + + | _, Format_arg_ty (sub_fmtty, rest) -> + Format_arg_ty (sub_fmtty, convert_fmtty_on_reader_nb rnu rest) + | _, Format_subst_ty (sub_rnu, sub_fmtty, rest) -> + let Format_subst_rnus (sub_rnu', sub_fmtty_rnu, rest_rnu) = + make_format_subst_rnus rnu sub_rnu in + let sub_fmtty' = convert_fmtty_on_reader_nb sub_fmtty_rnu sub_fmtty in + let rest' = convert_fmtty_on_reader_nb rest_rnu rest in + Format_subst_ty (sub_rnu', sub_fmtty', rest') + + | Zero_reader, End_of_fmtty -> End_of_fmtty + + | Zero_reader, Reader_ty _ -> + (* Impossible, by typing constraints on fmtty and rnu constructors: *) + (* rnu = Zero_reader => d1 == e1 *) + (* fmtty = Reader_ty _ => d1 <> e1 *) + assert false + | Zero_reader, Ignored_reader_ty _ -> + assert false (* Similar. *) + | Succ_reader _, End_of_fmtty -> + assert false (* Similar. *) + +(******************************************************************************) + (* Generic scanning *) + +(* Make a generic scanning function. *) +(* Scan a stream according to a format and readers obtained by + take_format_readers, and aggegate scanned values into an + heterogeneous list. *) +(* Return the heterogeneous list of scanned values. *) +let rec make_scanf : type a c d e f . + Scanning.in_channel -> (a, Scanning.in_channel, c, d, e, f) fmt -> + (d, _) heter_list -> (a, f) heter_list = +fun ib fmt readers -> match fmt with + | Char rest -> + let _ = scan_char 0 ib in + let c = token_char ib in + Cons (c, make_scanf ib rest readers) + | Caml_char rest -> + let _ = scan_caml_char 0 ib in + let c = token_char ib in + Cons (c, make_scanf ib rest readers) + + | String (pad, Formatting (fmting, rest)) -> + let stp, str = stopper_of_formatting fmting in + let scan width _ ib = scan_string (Some stp) width ib in + let str_rest = String_literal (str, rest) in + pad_prec_scanf ib str_rest readers pad No_precision scan token_string + | String (pad, rest) -> + let scan width _ ib = scan_string None width ib in + pad_prec_scanf ib rest readers pad No_precision scan token_string + + | Caml_string (pad, rest) -> + let scan width _ ib = scan_caml_string width ib in + pad_prec_scanf ib rest readers pad No_precision scan token_string + | Int (iconv, pad, prec, rest) -> + let c = char_of_iconv iconv in + let scan width _ ib = scan_int_conv c width ib in + pad_prec_scanf ib rest readers pad prec scan (token_int c) + | Int32 (iconv, pad, prec, rest) -> + let c = char_of_iconv iconv in + let scan width _ ib = scan_int_conv c width ib in + pad_prec_scanf ib rest readers pad prec scan (token_int32 c) + | Nativeint (iconv, pad, prec, rest) -> + let c = char_of_iconv iconv in + let scan width _ ib = scan_int_conv c width ib in + pad_prec_scanf ib rest readers pad prec scan (token_nativeint c) + | Int64 (iconv, pad, prec, rest) -> + let c = char_of_iconv iconv in + let scan width _ ib = scan_int_conv c width ib in + pad_prec_scanf ib rest readers pad prec scan (token_int64 c) + | Float (Float_F, pad, prec, rest) -> + pad_prec_scanf ib rest readers pad prec scan_caml_float token_float + | Float ((Float_f | Float_pf | Float_sf | Float_e | Float_pe | Float_se + | Float_E | Float_pE | Float_sE | Float_g | Float_pg | Float_sg + | Float_G | Float_pG | Float_sG), pad, prec, rest) -> + pad_prec_scanf ib rest readers pad prec scan_float token_float + + | Bool rest -> + let _ = scan_bool ib in + let b = token_bool ib in + Cons (b, make_scanf ib rest readers) + | Alpha _ -> + invalid_arg "scanf: bad conversion \"%a\"" + | Theta _ -> + invalid_arg "scanf: bad conversion \"%t\"" + | Reader fmt_rest -> + let Cons (reader, readers_rest) = readers in + let x = reader ib in + Cons (x, make_scanf ib fmt_rest readers_rest) + | Flush rest -> + if Scanning.end_of_input ib then make_scanf ib rest readers + else bad_input "end of input not found" + + | String_literal (str, rest) -> + String.iter (check_char ib) str; + make_scanf ib rest readers + | Char_literal (chr, rest) -> + check_char ib chr; + make_scanf ib rest readers + + | Format_arg (pad_opt, fmtty, rest) -> + let _ = scan_caml_string (width_of_pad_opt pad_opt) ib in + let s = token_string ib in + let fmt = + try format_of_string_fmtty s fmtty + with Failure msg -> bad_input msg + in + Cons (fmt, make_scanf ib rest readers) + | Format_subst (pad_opt, rnu, fmtty, rest) -> + let fmtty' = convert_fmtty_on_reader_nb rnu fmtty in + let _ = scan_caml_string (width_of_pad_opt pad_opt) ib in + let s = token_string ib in + let fmt, fmt' = + try + let Fmt_EBB fmt = fmt_ebb_of_string s in + type_format fmt fmtty, type_format fmt fmtty' + with Failure msg -> bad_input msg + in + Cons ((fmt', s), make_scanf ib (concat_fmt fmt rest) readers) + + | Scan_char_set (width_opt, char_set, Formatting (fmting, rest)) -> + let stp, str = stopper_of_formatting fmting in + let width = width_of_pad_opt width_opt in + let _ = scan_chars_in_char_set char_set (Some stp) width ib in + let s = token_string ib in + let str_rest = String_literal (str, rest) in + Cons (s, make_scanf ib str_rest readers) + | Scan_char_set (width_opt, char_set, rest) -> + let width = width_of_pad_opt width_opt in + let _ = scan_chars_in_char_set char_set None width ib in + let s = token_string ib in + Cons (s, make_scanf ib rest readers) + | Scan_get_counter (counter, rest) -> + let count = get_counter ib counter in + Cons (count, make_scanf ib rest readers) + + | Formatting (formatting, rest) -> + String.iter (check_char ib) (string_of_formatting formatting); + make_scanf ib rest readers + + | Ignored_param (ign, rest) -> + let Param_format_EBB fmt' = param_format_of_ignored_format ign rest in + begin match make_scanf ib fmt' readers with + | Cons (_, arg_rest) -> arg_rest + | Nil -> assert false + end -let sscanf_format s fmt = bscanf_format (Scanning.from_string s) fmt;; + | End_of_format -> + Nil + +(* Case analysis on padding and precision. *) +(* Reject formats containing "%*" or "%.*". *) +(* Pass padding and precision to the generic scanner `scan'. *) +and pad_prec_scanf : type a c d e f x y z t . + Scanning.in_channel -> (a, Scanning.in_channel, c, d, e, f) fmt -> + (d, _) heter_list -> (x, y) padding -> (y, z -> a) precision -> + (int -> int -> Scanning.in_channel -> t) -> + (Scanning.in_channel -> z) -> + (x, f) heter_list = +fun ib fmt readers pad prec scan token -> match pad, prec with + | No_padding, No_precision -> + let _ = scan max_int max_int ib in + let x = token ib in + Cons (x, make_scanf ib fmt readers) + | No_padding, Lit_precision p -> + let _ = scan max_int p ib in + let x = token ib in + Cons (x, make_scanf ib fmt readers) + | Lit_padding ((Right | Zeros), w), No_precision -> + let _ = scan w max_int ib in + let x = token ib in + Cons (x, make_scanf ib fmt readers) + | Lit_padding ((Right | Zeros), w), Lit_precision p -> + let _ = scan w p ib in + let x = token ib in + Cons (x, make_scanf ib fmt readers) + | Lit_padding (Left, _), _ -> + invalid_arg "scanf: bad conversion \"%-\"" + | Lit_padding ((Right | Zeros), _), Arg_precision -> + invalid_arg "scanf: bad conversion \"%*\"" + | Arg_padding _, _ -> + invalid_arg "scanf: bad conversion \"%*\"" + | No_padding, Arg_precision -> + invalid_arg "scanf: bad conversion \"%*\"" + +(******************************************************************************) + (* Defining [scanf] and various flavors of [scanf] *) + +type 'a kscanf_result = Args of 'a | Exc of exn + +let kscanf ib ef (fmt, str) = + let rec apply : type a b . a -> (a, b) heter_list -> b = + fun f args -> match args with + | Cons (x, r) -> apply (f x) r + | Nil -> f + in + let k readers f = + Scanning.reset_token ib; + match try Args (make_scanf ib fmt readers) with + | (Scan_failure _ | Failure _ | End_of_file) as exc -> Exc exc + | Invalid_argument msg -> + invalid_arg (msg ^ " in format \"" ^ String.escaped str ^ "\"") + with + | Args args -> apply f args + | Exc exc -> ef ib exc + in + take_format_readers k fmt + +let kbscanf = kscanf + +(***) + +let ksscanf s ef fmt = kbscanf (Scanning.from_string s) ef fmt +let kfscanf ic ef fmt = kbscanf (Scanning.from_channel ic) ef fmt +let bscanf ib fmt = kscanf ib scanf_bad_input fmt +let fscanf ic fmt = kscanf (Scanning.from_channel ic) scanf_bad_input fmt +let sscanf s fmt = kscanf (Scanning.from_string s) scanf_bad_input fmt +let scanf fmt = kscanf Scanning.stdib scanf_bad_input fmt + +(***) + +let bscanf_format : Scanning.in_channel -> ('a, 'b, 'c, 'd, 'e, 'f) format6 -> + (('a, 'b, 'c, 'd, 'e, 'f) format6 -> 'g) -> 'g = + fun ib format f -> + let _ = scan_caml_string max_int ib in + let str = token_string ib in + let fmt' = + try format_of_string_format str format + with Failure msg -> bad_input msg + in + f fmt' + +let sscanf_format : string -> ('a, 'b, 'c, 'd, 'e, 'f) format6 -> + (('a, 'b, 'c, 'd, 'e, 'f) format6 -> 'g) -> 'g = + fun s format f -> bscanf_format (Scanning.from_string s) format f let string_to_String s = let l = String.length s in diff --git a/stdlib/scanf.mli b/stdlib/scanf.mli index a1b3d1acb..297d6f2d5 100644 --- a/stdlib/scanf.mli +++ b/stdlib/scanf.mli @@ -485,6 +485,16 @@ val kscanf : exception that aborted the scanning process as arguments. *) +val ksscanf : + string -> (Scanning.in_channel -> exn -> 'd) -> + ('a, 'b, 'c, 'd) scanner +(** Same as {!Scanf.kscanf} but reads from the given string. *) + +val kfscanf : + Pervasives.in_channel -> (Scanning.in_channel -> exn -> 'd) -> + ('a, 'b, 'c, 'd) scanner +(** Same as {!Scanf.kscanf}, but reads from the given regular input channel. *) + (** {6 Reading format strings from input} *) val bscanf_format : |