summaryrefslogtreecommitdiffstats
path: root/stdlib/scanf.ml
diff options
context:
space:
mode:
Diffstat (limited to 'stdlib/scanf.ml')
-rw-r--r--stdlib/scanf.ml1078
1 files changed, 455 insertions, 623 deletions
diff --git a/stdlib/scanf.ml b/stdlib/scanf.ml
index 69837c470..f4e97a048 100644
--- a/stdlib/scanf.ml
+++ b/stdlib/scanf.ml
@@ -11,6 +11,19 @@
(* *)
(***********************************************************************)
+open CamlinternalFormatBasics
+open CamlinternalFormat
+
+(* alias to avoid warning for ambiguity between
+ Pervasives.format6
+ and CamlinternalFormatBasics.format6
+
+ (the former is in fact an alias for the latter,
+ but the ambiguity warning doesn't care)
+*)
+type ('a, 'b, 'c, 'd, 'e, 'f) format6 =
+ ('a, 'b, 'c, 'd, 'e, 'f) Pervasives.format6
+
(* The run-time library for scanners. *)
(* Scanning buffers. *)
@@ -402,11 +415,6 @@ end
type ('a, 'b, 'c, 'd) scanner =
('a, Scanning.in_channel, 'b, 'c, 'a -> 'd, 'd) format6 -> 'c
-;;
-
-external string_to_format :
- string -> ('a, 'b, 'c, 'd, 'e, 'f) format6 = "%identity"
-;;
(* Reporting errors. *)
exception Scan_failure of string;;
@@ -429,33 +437,6 @@ let bad_end_of_input message =
(Printf.sprintf
"scanning of %s failed: \
premature end of file occurred before end of token" message)
-;;
-
-let int_of_width_opt = function
- | None -> max_int
- | Some width -> width
-;;
-
-let int_of_prec_opt = function
- | None -> max_int
- | Some prec -> prec
-;;
-
-module Sformat = Printf.CamlinternalPr.Sformat;;
-module Tformat = Printf.CamlinternalPr.Tformat;;
-
-let bad_conversion fmt i c =
- invalid_arg
- (Printf.sprintf
- "scanf: bad conversion %%%C, at char number %i \
- in format string \'%s\'" c i (Sformat.to_string fmt))
-;;
-
-let incomplete_format fmt =
- invalid_arg
- (Printf.sprintf "scanf: premature end of format string \'%s\'"
- (Sformat.to_string fmt))
-;;
let bad_float () =
bad_input "no dot or exponent part found in float token"
@@ -467,19 +448,15 @@ let character_mismatch_err c ci =
let character_mismatch c ci =
bad_input (character_mismatch_err c ci)
-;;
-
-let format_mismatch_err fmt1 fmt2 =
- Printf.sprintf
- "format read \'%s\' does not match specification \'%s\'" fmt1 fmt2
-;;
-
-let format_mismatch fmt1 fmt2 = bad_input (format_mismatch_err fmt1 fmt2);;
-(* Checking that 2 format strings are type compatible. *)
-let compatible_format_type fmt1 fmt2 =
- Tformat.summarize_format_type (string_to_format fmt1) =
- Tformat.summarize_format_type (string_to_format fmt2);;
+let rec skip_whites ib =
+ let c = Scanning.peek_char ib in
+ if not (Scanning.eof ib) then begin
+ match c with
+ | ' ' | '\t' | '\n' | '\r' ->
+ Scanning.invalidate_current_char ib; skip_whites ib
+ | _ -> ()
+ end
(* Checking that [c] is indeed in the input, then skips it.
In this case, the character [c] has been explicitly specified in the
@@ -496,28 +473,13 @@ let compatible_format_type fmt1 fmt2 =
We are also careful to treat "\r\n" in the input as an end of line marker:
it always matches a '\n' specification in the input format string. *)
let rec check_char ib c =
- let ci = Scanning.checked_peek_char ib in
- if ci = c then Scanning.invalidate_current_char ib else begin
- match ci with
- | '\r' when c = '\n' ->
- Scanning.invalidate_current_char ib; check_char ib '\n'
- | _ -> character_mismatch c ci
- end
-;;
-
-(* Checks that the current char is indeed one of the stopper characters,
- then skips it.
- Be careful that if ib has no more character this procedure should
- just do nothing (since %s@c defaults to the entire rest of the
- buffer, when no character c can be found in the input). *)
-let ignore_stoppers stps ib =
- if stps <> [] && not (Scanning.eof ib) then
- let ci = Scanning.peek_char ib in
- if List.memq ci stps then Scanning.invalidate_current_char ib else
- let sr = String.concat "" (List.map (String.make 1) stps) in
- bad_input
- (Printf.sprintf "looking for one of range %S, found %C" sr ci)
-;;
+ if c = ' ' then skip_whites ib else
+ let ci = Scanning.checked_peek_char ib in
+ if ci = c then Scanning.invalidate_current_char ib else
+ match ci with
+ | '\r' when c = '\n' ->
+ Scanning.invalidate_current_char ib; check_char ib '\n'
+ | _ -> character_mismatch c ci
(* Extracting tokens from the output token buffer. *)
@@ -701,7 +663,7 @@ let scan_optionally_signed_int width ib =
scan_unsigned_int width ib
;;
-let scan_int_conv conv width _prec ib =
+let scan_int_conv conv width ib =
match conv with
| 'b' -> scan_binary_int width ib
| 'd' -> scan_optionally_signed_decimal_int width ib
@@ -791,7 +753,7 @@ let scan_float width precision ib =
scan_exp_part width ib, precision
;;
-let scan_Float width precision ib =
+let scan_caml_float width precision ib =
let width = scan_optionally_signed_decimal_int width ib in
if width = 0 then bad_float () else
let c = Scanning.peek_char ib in
@@ -805,12 +767,11 @@ let scan_Float width precision ib =
| 'e' | 'E' ->
scan_exp_part width ib
| _ -> bad_float ()
-;;
(* Scan a regular string:
stops when encountering a space, if no scanning indication has been given;
- otherwise, stops when encountering one of the characters in the scanning
- indication list [stp].
+ otherwise, stops when encountering the characters in the scanning
+ indication [stp].
It also stops at end of file or when the maximum number of characters has
been read.*)
let scan_string stp width ib =
@@ -818,12 +779,14 @@ let scan_string stp width ib =
if width = 0 then width else
let c = Scanning.peek_char ib in
if Scanning.eof ib then width else
- if stp = [] then
- match c with
- | ' ' | '\t' | '\n' | '\r' -> width
- | c -> loop (Scanning.store_char width ib c) else
- if List.memq c stp then Scanning.skip_char width ib else
- loop (Scanning.store_char width ib c) in
+ match stp with
+ | Some c' when c = c' -> Scanning.skip_char width ib
+ | Some _ -> loop (Scanning.store_char width ib c)
+ | None ->
+ match c with
+ | ' ' | '\t' | '\n' | '\r' -> width
+ | _ -> loop (Scanning.store_char width ib c)
+ in
loop width
;;
@@ -925,7 +888,7 @@ let scan_backslash_char width ib =
;;
(* Scan a character (an OCaml token). *)
-let scan_Char width ib =
+let scan_caml_char width ib =
let rec find_start width =
match Scanning.checked_peek_char ib with
@@ -948,7 +911,7 @@ let scan_Char width ib =
;;
(* Scan a delimited string (an OCaml token). *)
-let scan_String width ib =
+let scan_caml_string width ib =
let rec find_start width =
match Scanning.checked_peek_char ib with
@@ -981,8 +944,7 @@ let scan_String width ib =
;;
(* Scan a boolean (an OCaml token). *)
-let scan_bool width ib =
- if width < 4 then bad_token_length "a boolean" else
+let scan_bool ib =
let c = Scanning.checked_peek_char ib in
let m =
match c with
@@ -991,560 +953,430 @@ let scan_bool width ib =
| c ->
bad_input
(Printf.sprintf "the character %C cannot start a boolean" c) in
- scan_string [] (min width m) ib
-;;
-
-(* Reading char sets in %[...] conversions. *)
-type char_set =
- | Pos_set of string (* Positive (regular) set. *)
- | Neg_set of string (* Negative (complementary) set. *)
-;;
-
-
-(* Char sets are read as sub-strings in the format string. *)
-let scan_range fmt j =
-
- let len = Sformat.length fmt in
-
- let buffer = Buffer.create len in
-
- let rec scan_closing j =
- if j >= len then incomplete_format fmt else
- match Sformat.get fmt j with
- | ']' -> j, Buffer.contents buffer
- | '%' ->
- let j = j + 1 in
- if j >= len then incomplete_format fmt else
- begin match Sformat.get fmt j with
- | '%' | '@' as c ->
- Buffer.add_char buffer c;
- scan_closing (j + 1)
- | c -> bad_conversion fmt j c
- end
- | c ->
- Buffer.add_char buffer c;
- scan_closing (j + 1) in
-
- let scan_first_pos j =
- if j >= len then incomplete_format fmt else
- match Sformat.get fmt j with
- | ']' as c ->
- Buffer.add_char buffer c;
- scan_closing (j + 1)
- | _ -> scan_closing j in
-
- let scan_first_neg j =
- if j >= len then incomplete_format fmt else
- match Sformat.get fmt j with
- | '^' ->
- let j = j + 1 in
- let k, char_set = scan_first_pos j in
- k, Neg_set char_set
- | _ ->
- let k, char_set = scan_first_pos j in
- k, Pos_set char_set in
-
- scan_first_neg j
-;;
-
-(* Char sets are now represented as bit vectors that are represented as
- byte strings. *)
-
-(* Bit manipulations into bytes. *)
-let set_bit_of_byte byte idx b =
- (b lsl idx) lor (byte land (* mask idx *) (lnot (1 lsl idx)))
-;;
-
-let get_bit_of_byte byte idx = (byte lsr idx) land 1;;
-
-(* Bit manipulations in vectors of bytes represented as strings. *)
-let set_bit_of_range r c b =
- let idx = c land 0x7 in
- let ydx = c lsr 3 in
- let byte = Bytes.get r ydx in
- Bytes.set r ydx (char_of_int (set_bit_of_byte (int_of_char byte) idx b))
-;;
-
-let get_bit_of_range r c =
- let idx = c land 0x7 in
- let ydx = c lsr 3 in
- let byte = Bytes.get r ydx in
- get_bit_of_byte (int_of_char byte) idx
-;;
-
-(* Char sets represented as bit vectors represented as fixed length byte
- strings. *)
-(* Create a full or empty set of chars. *)
-let make_range bit =
- let c = char_of_int (if bit = 0 then 0 else 0xFF) in
- Bytes.make 32 c
-;;
-
-(* Test if a char belongs to a set of chars. *)
-let get_char_in_range r c = get_bit_of_range r (int_of_char c);;
-
-let bit_not b = (lnot b) land 1;;
-
-(* Build the bit vector corresponding to the set of characters
- that belongs to the string argument [set].
- (In the [Scanf] module [set] is always a sub-string of the format.) *)
-let make_char_bit_vect bit set =
- let r = make_range (bit_not bit) in
- let lim = String.length set - 1 in
- let rec loop bit rp i =
- if i <= lim then
- match set.[i] with
- | '-' when rp ->
- (* if i = 0 then rp is false (since the initial call is
- loop bit false 0). Hence i >= 1 and the following is safe. *)
- let c1 = set.[i - 1] in
- let i = succ i in
- if i > lim then loop bit false (i - 1) else
- let c2 = set.[i] in
- for j = int_of_char c1 to int_of_char c2 do
- set_bit_of_range r j bit done;
- loop bit false (succ i)
- | _ ->
- set_bit_of_range r (int_of_char set.[i]) bit;
- loop bit true (succ i) in
- loop bit false 0;
- r
-;;
-
-(* Compute the predicate on chars corresponding to a char set. *)
-let make_predicate bit set stp =
- let r = make_char_bit_vect bit set in
- List.iter
- (fun c -> set_bit_of_range r (int_of_char c) (bit_not bit)) stp;
- (fun c -> get_char_in_range r c)
-;;
+ scan_string None m ib
-let make_setp stp char_set =
- match char_set with
- | Pos_set set ->
- begin match String.length set with
- | 0 -> (fun _ -> 0)
- | 1 ->
- let p = set.[0] in
- (fun c -> if c == p then 1 else 0)
- | 2 ->
- let p1 = set.[0] and p2 = set.[1] in
- (fun c -> if c == p1 || c == p2 then 1 else 0)
- | 3 ->
- let p1 = set.[0] and p2 = set.[1] and p3 = set.[2] in
- if p2 = '-' then make_predicate 1 set stp else
- (fun c -> if c == p1 || c == p2 || c == p3 then 1 else 0)
- | _ -> make_predicate 1 set stp
- end
- | Neg_set set ->
- begin match String.length set with
- | 0 -> (fun _ -> 1)
- | 1 ->
- let p = set.[0] in
- (fun c -> if c != p then 1 else 0)
- | 2 ->
- let p1 = set.[0] and p2 = set.[1] in
- (fun c -> if c != p1 && c != p2 then 1 else 0)
- | 3 ->
- let p1 = set.[0] and p2 = set.[1] and p3 = set.[2] in
- if p2 = '-' then make_predicate 0 set stp else
- (fun c -> if c != p1 && c != p2 && c != p3 then 1 else 0)
- | _ -> make_predicate 0 set stp
- end
-;;
-
-let setp_table = Hashtbl.create 7;;
-
-let add_setp stp char_set setp =
- let char_set_tbl =
- try Hashtbl.find setp_table char_set with
- | Not_found ->
- let char_set_tbl = Hashtbl.create 3 in
- Hashtbl.add setp_table char_set char_set_tbl;
- char_set_tbl in
- Hashtbl.add char_set_tbl stp setp
-;;
-
-let find_setp stp char_set =
- try Hashtbl.find (Hashtbl.find setp_table char_set) stp with
- | Not_found ->
- let setp = make_setp stp char_set in
- add_setp stp char_set setp;
- setp
-;;
-
-let scan_chars_in_char_set stp char_set width ib =
- let rec loop_pos1 cp1 width =
- if width = 0 then width else
- let c = Scanning.peek_char ib in
- if Scanning.eof ib then width else
- if c == cp1
- then loop_pos1 cp1 (Scanning.store_char width ib c)
- else width
- and loop_pos2 cp1 cp2 width =
- if width = 0 then width else
- let c = Scanning.peek_char ib in
- if Scanning.eof ib then width else
- if c == cp1 || c == cp2
- then loop_pos2 cp1 cp2 (Scanning.store_char width ib c)
- else width
- and loop_pos3 cp1 cp2 cp3 width =
- if width = 0 then width else
- let c = Scanning.peek_char ib in
- if Scanning.eof ib then width else
- if c == cp1 || c == cp2 || c == cp3
- then loop_pos3 cp1 cp2 cp3 (Scanning.store_char width ib c)
- else width
- and loop_neg1 cp1 width =
- if width = 0 then width else
- let c = Scanning.peek_char ib in
- if Scanning.eof ib then width else
- if c != cp1
- then loop_neg1 cp1 (Scanning.store_char width ib c)
- else width
- and loop_neg2 cp1 cp2 width =
- if width = 0 then width else
- let c = Scanning.peek_char ib in
- if Scanning.eof ib then width else
- if c != cp1 && c != cp2
- then loop_neg2 cp1 cp2 (Scanning.store_char width ib c)
- else width
- and loop_neg3 cp1 cp2 cp3 width =
- if width = 0 then width else
+(* Scan a string containing elements in char_set and terminated by scan_indic
+ if provided. *)
+let scan_chars_in_char_set char_set scan_indic width ib =
+ let rec scan_chars i stp =
let c = Scanning.peek_char ib in
- if Scanning.eof ib then width else
- if c != cp1 && c != cp2 && c != cp3
- then loop_neg3 cp1 cp2 cp3 (Scanning.store_char width ib c)
- else width
- and loop setp width =
- if width = 0 then width else
- let c = Scanning.peek_char ib in
- if Scanning.eof ib then width else
- if setp c == 1
- then loop setp (Scanning.store_char width ib c)
- else width in
-
- let width =
- match char_set with
- | Pos_set set ->
- begin match String.length set with
- | 0 -> loop (fun _ -> 0) width
- | 1 -> loop_pos1 set.[0] width
- | 2 -> loop_pos2 set.[0] set.[1] width
- | 3 when set.[1] != '-' -> loop_pos3 set.[0] set.[1] set.[2] width
- | _ -> loop (find_setp stp char_set) width end
- | Neg_set set ->
- begin match String.length set with
- | 0 -> loop (fun _ -> 1) width
- | 1 -> loop_neg1 set.[0] width
- | 2 -> loop_neg2 set.[0] set.[1] width
- | 3 when set.[1] != '-' -> loop_neg3 set.[0] set.[1] set.[2] width
- | _ -> loop (find_setp stp char_set) width end in
- ignore_stoppers stp ib;
- width
-;;
-
-let get_count t ib =
- match t with
- | 'l' -> Scanning.line_count ib
- | 'n' -> Scanning.char_count ib
- | _ -> Scanning.token_count ib
-;;
-
-let rec skip_whites ib =
- let c = Scanning.peek_char ib in
- if not (Scanning.eof ib) then begin
- match c with
- | ' ' | '\t' | '\n' | '\r' ->
- Scanning.invalidate_current_char ib; skip_whites ib
- | _ -> ()
- end
-;;
+ if i > 0 && not (Scanning.eof ib) && is_in_char_set char_set c &&
+ int_of_char c <> stp then
+ let _ = Scanning.store_char max_int ib c in
+ scan_chars (i - 1) stp;
+ in
+ match scan_indic with
+ | None -> scan_chars width (-1);
+ | Some c ->
+ scan_chars width (int_of_char c);
+ if not (Scanning.eof ib) then
+ let ci = Scanning.peek_char ib in
+ if c = ci then Scanning.invalidate_current_char ib
+ else character_mismatch c ci
(* The global error report function for [Scanf]. *)
let scanf_bad_input ib = function
| Scan_failure s | Failure s ->
let i = Scanning.char_count ib in
- bad_input (Printf.sprintf "scanf: bad input at char number %i: \'%s\'" i s)
+ bad_input (Printf.sprintf "scanf: bad input at char number %i: %S" i s)
| x -> raise x
-;;
-
-let list_iter_i f l =
- let rec loop i = function
- | [] -> ()
- | [x] -> f i x (* Tail calling [f] *)
- | x :: xs -> f i x; loop (succ i) xs in
- loop 0 l
-;;
-
-let ascanf sc fmt =
- let ac = Tformat.ac_of_format fmt in
- match ac.Tformat.ac_rdrs with
- | 0 ->
- Obj.magic (fun f -> sc fmt [||] f)
- | 1 ->
- Obj.magic (fun x f -> sc fmt [| Obj.repr x |] f)
- | 2 ->
- Obj.magic (fun x y f -> sc fmt [| Obj.repr x; Obj.repr y; |] f)
- | 3 ->
- Obj.magic
- (fun x y z f -> sc fmt [| Obj.repr x; Obj.repr y; Obj.repr z; |] f)
- | nargs ->
- let rec loop i args =
- if i >= nargs then
- let a = Array.make nargs (Obj.repr 0) in
- list_iter_i (fun i arg -> a.(nargs - i - 1) <- arg) args;
- Obj.magic (fun f -> sc fmt a f)
- else Obj.magic (fun x -> loop (succ i) (x :: args)) in
- loop 0 []
-;;
-
-(* The [scan_format] main scanning function.
- It takes as arguments:
- - an input buffer [ib] from which to read characters,
- - an error handling function [ef],
- - a format [fmt] that specifies what to read in the input,
- - a vector of user's defined readers [rv],
- - and a function [f] to pass the tokens read to.
-
- Then [scan_format] scans the format and the input buffer in parallel to
- find out tokens as specified by the format; when it finds one token, it
- converts it as specified, remembers the converted value as a future
- argument to the function [f], and continues scanning.
-
- If the entire scanning succeeds (i.e. the format string has been
- exhausted and the buffer has provided tokens according to the
- format string), [f] is applied to the tokens read.
-
- If the scanning or some conversion fails, the main scanning function
- aborts and applies the scanning buffer and a string that explains
- the error to the error handling function [ef] (the error continuation). *)
-
-let scan_format ib ef fmt rv f =
-
- let limr = Array.length rv - 1 in
-
- let return v = Obj.magic v () in
- let delay f x () = f x in
- let stack f = delay (return f) in
- let no_stack f _x = f in
-
- let rec scan fmt =
-
- let lim = Sformat.length fmt - 1 in
-
- let rec scan_fmt ir f i =
- if i > lim then ir, f else
- match Sformat.unsafe_get fmt i with
- | '%' -> scan_skip ir f (succ i)
- | ' ' -> skip_whites ib; scan_fmt ir f (succ i)
- | c -> check_char ib c; scan_fmt ir f (succ i)
-
- and scan_skip ir f i =
- if i > lim then ir, f else
- match Sformat.get fmt i with
- | '_' -> scan_limits true ir f (succ i)
- | _ -> scan_limits false ir f i
-
- and scan_limits skip ir f i =
-
- let rec scan_width i =
- if i > lim then incomplete_format fmt else
- match Sformat.get fmt i with
- | '0' .. '9' as conv ->
- let width, i =
- read_int_literal (decimal_value_of_char conv) (succ i) in
- Some width, i
- | _ -> None, i
-
- and scan_precision i =
- begin
- match Sformat.get fmt i with
- | '.' ->
- let precision, i = read_int_literal 0 (succ i) in
- (Some precision, i)
- | _ -> None, i
- end
- and read_int_literal accu i =
- if i > lim then accu, i else
- match Sformat.unsafe_get fmt i with
- | '0' .. '9' as c ->
- let accu = 10 * accu + decimal_value_of_char c in
- read_int_literal accu (succ i)
- | _ -> accu, i in
-
- if i > lim then ir, f else
- let width_opt, i = scan_width i in
- let prec_opt, i = scan_precision i in
- scan_conversion skip width_opt prec_opt ir f i
-
- and scan_conversion skip width_opt prec_opt ir f i =
- let stack = if skip then no_stack else stack in
- let width = int_of_width_opt width_opt in
- let prec = int_of_prec_opt prec_opt in
- match Sformat.get fmt i with
- | '%' | '@' as c ->
- check_char ib c;
- scan_fmt ir f (succ i)
- | '!' ->
- if not (Scanning.end_of_input ib)
- then bad_input "end of input not found" else
- scan_fmt ir f (succ i)
- | ',' ->
- scan_fmt ir f (succ i)
- | 's' ->
- let i, stp = scan_indication (succ i) in
- let _x = scan_string stp width ib in
- scan_fmt ir (stack f (token_string ib)) (succ i)
- | 'S' ->
- let _x = scan_String width ib in
- scan_fmt ir (stack f (token_string ib)) (succ i)
- | '[' (* ']' *) ->
- let i, char_set = scan_range fmt (succ i) in
- let i, stp = scan_indication (succ i) in
- let _x = scan_chars_in_char_set stp char_set width ib in
- scan_fmt ir (stack f (token_string ib)) (succ i)
- | ('c' | 'C') when width = 0 ->
- let c = Scanning.checked_peek_char ib in
- scan_fmt ir (stack f c) (succ i)
- | 'c' ->
- let _x = scan_char width ib in
- scan_fmt ir (stack f (token_char ib)) (succ i)
- | 'C' ->
- let _x = scan_Char width ib in
- scan_fmt ir (stack f (token_char ib)) (succ i)
- | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' as conv ->
- let _x = scan_int_conv conv width prec ib in
- scan_fmt ir (stack f (token_int conv ib)) (succ i)
- | 'N' as conv ->
- scan_fmt ir (stack f (get_count conv ib)) (succ i)
- | 'f' | 'e' | 'E' | 'g' | 'G' ->
- let _x = scan_float width prec ib in
- scan_fmt ir (stack f (token_float ib)) (succ i)
- | 'F' ->
- let _x = scan_Float width prec ib in
- scan_fmt ir (stack f (token_float ib)) (succ i)
-(* | 'B' | 'b' when width = Some 0 ->
- let _x = scan_bool width ib in
- scan_fmt ir (stack f (token_int ib)) (succ i) *)
- | 'B' | 'b' ->
- let _x = scan_bool width ib in
- scan_fmt ir (stack f (token_bool ib)) (succ i)
- | 'r' ->
- if ir > limr then assert false else
- let token = Obj.magic rv.(ir) ib in
- scan_fmt (succ ir) (stack f token) (succ i)
- | 'l' | 'n' | 'L' as conv0 ->
- let i = succ i in
- if i > lim then scan_fmt ir (stack f (get_count conv0 ib)) i else begin
- match Sformat.get fmt i with
- (* This is in fact an integer conversion (e.g. %ld, %ni, or %Lo). *)
- | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' as conv1 ->
- let _x = scan_int_conv conv1 width prec ib in
- (* Look back to the character that triggered the integer conversion
- (this character is either 'l', 'n' or 'L') to find the
- conversion to apply to the integer token read. *)
- begin match conv0 with
- | 'l' -> scan_fmt ir (stack f (token_int32 conv1 ib)) (succ i)
- | 'n' -> scan_fmt ir (stack f (token_nativeint conv1 ib)) (succ i)
- | _ -> scan_fmt ir (stack f (token_int64 conv1 ib)) (succ i) end
- (* This is not an integer conversion, but a regular %l, %n or %L. *)
- | _ -> scan_fmt ir (stack f (get_count conv0 ib)) i end
- | '(' | '{' as conv (* ')' '}' *) ->
- let i = succ i in
- (* Find [mf], the static specification for the format to read. *)
- let j =
- Tformat.sub_format
- incomplete_format bad_conversion conv fmt i in
- let mf = Sformat.sub fmt (Sformat.index_of_int i) (j - 2 - i) in
- (* Read [rf], the specified format string in the input buffer,
- and check its correctness w.r.t. [mf]. *)
- let _x = scan_String width ib in
- let rf = token_string ib in
- if not (compatible_format_type rf mf) then format_mismatch rf mf else
- (* Proceed according to the kind of metaformat found:
- - %{ mf %} simply returns [rf] as the token read,
- - %( mf %) returns [rf] as the first token read, then
- returns a second token obtained by scanning the input with
- format string [rf].
- Behaviour for %( mf %) is mandatory for sake of format string
- typechecking specification. To get pure format string
- substitution behaviour, you should use %_( mf %) that skips the
- first (format string) token and hence properly substitutes [mf] by
- [rf] in the format string argument.
- *)
- (* For conversion %{%}, just return this format string as the token
- read and go on with the rest of the format string argument. *)
- if conv = '{' (* '}' *) then scan_fmt ir (stack f rf) j else
- (* Or else, return this format string as the first token read;
- then continue scanning using this format string to get
- the following token read;
- finally go on with the rest of the format string argument. *)
- let ir, nf = scan (string_to_format rf) ir (stack f rf) 0 in
- (* Return the format string read and the value just read,
- then go on with the rest of the format. *)
- scan_fmt ir nf j
-
- | c -> bad_conversion fmt i c
-
- and scan_indication j =
- if j > lim then j - 1, [] else
- match Sformat.get fmt j with
- | '@' ->
- let k = j + 1 in
- if k > lim then j - 1, [] else
- begin match Sformat.get fmt k with
- | '%' ->
- let k = k + 1 in
- if k > lim then j - 1, [] else
- begin match Sformat.get fmt k with
- | '%' | '@' as c -> k, [ c ]
- | _c -> j - 1, []
- end
- | c -> k, [ c ]
- end
- | _c -> j - 1, [] in
-
- scan_fmt in
-
-
- Scanning.reset_token ib;
-
- let v =
- try snd (scan fmt 0 (fun () -> f) 0) with
- | (Scan_failure _ | Failure _ | End_of_file) as exc ->
- stack (delay ef ib) exc in
- return v
-;;
-
-let mkscanf ib ef fmt =
- let sc = scan_format ib ef in
- ascanf sc fmt
-;;
+(* Get the content of a counter from an input buffer. *)
+let get_counter ib counter = match counter with
+ | Line_counter -> Scanning.line_count ib
+ | Char_counter -> Scanning.char_count ib
+ | Token_counter -> Scanning.token_count ib
-let kscanf ib ef fmt = mkscanf ib ef fmt;;
-
-let bscanf ib = kscanf ib scanf_bad_input;;
-
-let fscanf ic = bscanf (Scanning.from_channel ic);;
-
-let sscanf : string -> ('a, 'b, 'c, 'd) scanner
- = fun s -> bscanf (Scanning.from_string s);;
-
-let scanf fmt = bscanf Scanning.stdib fmt;;
+(* Compute the width of a padding option (see "%42{" and "%123("). *)
+let width_of_pad_opt pad_opt = match pad_opt with
+ | None -> max_int
+ | Some width -> width
-let bscanf_format ib fmt f =
- let fmt = Sformat.unsafe_to_string fmt in
- let fmt1 =
- ignore (scan_String max_int ib);
- token_string ib in
- if not (compatible_format_type fmt1 fmt) then
- format_mismatch fmt1 fmt else
- f (string_to_format fmt1)
-;;
+let stopper_of_formatting fmting =
+ if fmting = Escaped_percent then '%', "" else
+ let str = string_of_formatting fmting in
+ let stp = str.[1] in
+ let sub_str = String.sub str 2 (String.length str - 2) in
+ stp, sub_str
+
+(******************************************************************************)
+ (* Readers managment *)
+
+(* A call to take_format_readers on a format is evaluated into functions
+ taking readers as arguments and aggregate them into an heterogeneous list *)
+(* When all readers are taken, finally pass the list of the readers to the
+ continuation k. *)
+let rec take_format_readers : type a c d e f .
+ ((d, e) heter_list -> e) -> (a, Scanning.in_channel, c, d, e, f) fmt ->
+ d =
+fun k fmt -> match fmt with
+ | Reader fmt_rest ->
+ fun reader ->
+ let new_k readers_rest = k (Cons (reader, readers_rest)) in
+ take_format_readers new_k fmt_rest
+ | Char rest -> take_format_readers k rest
+ | Caml_char rest -> take_format_readers k rest
+ | String (_, rest) -> take_format_readers k rest
+ | Caml_string (_, rest) -> take_format_readers k rest
+ | Int (_, _, _, rest) -> take_format_readers k rest
+ | Int32 (_, _, _, rest) -> take_format_readers k rest
+ | Nativeint (_, _, _, rest) -> take_format_readers k rest
+ | Int64 (_, _, _, rest) -> take_format_readers k rest
+ | Float (_, _, _, rest) -> take_format_readers k rest
+ | Bool rest -> take_format_readers k rest
+ | Alpha rest -> take_format_readers k rest
+ | Theta rest -> take_format_readers k rest
+ | Flush rest -> take_format_readers k rest
+ | String_literal (_, rest) -> take_format_readers k rest
+ | Char_literal (_, rest) -> take_format_readers k rest
+
+ | Scan_char_set (_, _, rest) -> take_format_readers k rest
+ | Scan_get_counter (_, rest) -> take_format_readers k rest
+
+ | Formatting (_, rest) -> take_format_readers k rest
+
+ | Format_arg (_, _, rest) -> take_format_readers k rest
+ | Format_subst (_, _, fmtty, rest) -> take_fmtty_format_readers k fmtty rest
+ | Ignored_param (ign, rest) -> take_ignored_format_readers k ign rest
+
+ | End_of_format -> k Nil
+
+(* Take readers associated to an fmtty coming from a Format_subst "%(...%)". *)
+and take_fmtty_format_readers : type x y a c d e f .
+ ((d, e) heter_list -> e) -> (a, Scanning.in_channel, c, d, x, y) fmtty ->
+ (y, Scanning.in_channel, c, x, e, f) fmt -> d =
+fun k fmtty fmt -> match fmtty with
+ | Reader_ty fmt_rest ->
+ fun reader ->
+ let new_k readers_rest = k (Cons (reader, readers_rest)) in
+ take_fmtty_format_readers new_k fmt_rest fmt
+ | Ignored_reader_ty fmt_rest ->
+ fun reader ->
+ let new_k readers_rest = k (Cons (reader, readers_rest)) in
+ take_fmtty_format_readers new_k fmt_rest fmt
+ | Char_ty rest -> take_fmtty_format_readers k rest fmt
+ | String_ty rest -> take_fmtty_format_readers k rest fmt
+ | Int_ty rest -> take_fmtty_format_readers k rest fmt
+ | Int32_ty rest -> take_fmtty_format_readers k rest fmt
+ | Nativeint_ty rest -> take_fmtty_format_readers k rest fmt
+ | Int64_ty rest -> take_fmtty_format_readers k rest fmt
+ | Float_ty rest -> take_fmtty_format_readers k rest fmt
+ | Bool_ty rest -> take_fmtty_format_readers k rest fmt
+ | Alpha_ty rest -> take_fmtty_format_readers k rest fmt
+ | Theta_ty rest -> take_fmtty_format_readers k rest fmt
+ | Format_arg_ty (_, rest) -> take_fmtty_format_readers k rest fmt
+ | End_of_fmtty -> take_format_readers k fmt
+ | Format_subst_ty (_, ty, rest) ->
+ take_fmtty_format_readers k (concat_fmtty ty rest) fmt
+
+(* Take readers associated to an ignored parameter. *)
+and take_ignored_format_readers : type x y a c d e f .
+ ((d, e) heter_list -> e) -> (a, Scanning.in_channel, c, d, x, y) ignored ->
+ (y, Scanning.in_channel, c, x, e, f) fmt -> d =
+fun k ign fmt -> match ign with
+ | Ignored_reader ->
+ fun reader ->
+ let new_k readers_rest = k (Cons (reader, readers_rest)) in
+ take_format_readers new_k fmt
+ | Ignored_char -> take_format_readers k fmt
+ | Ignored_caml_char -> take_format_readers k fmt
+ | Ignored_string _ -> take_format_readers k fmt
+ | Ignored_caml_string _ -> take_format_readers k fmt
+ | Ignored_int (_, _) -> take_format_readers k fmt
+ | Ignored_int32 (_, _) -> take_format_readers k fmt
+ | Ignored_nativeint (_, _) -> take_format_readers k fmt
+ | Ignored_int64 (_, _) -> take_format_readers k fmt
+ | Ignored_float (_, _) -> take_format_readers k fmt
+ | Ignored_bool -> take_format_readers k fmt
+ | Ignored_format_arg _ -> take_format_readers k fmt
+ | Ignored_format_subst (_, fmtty) -> take_fmtty_format_readers k fmtty fmt
+ | Ignored_scan_char_set _ -> take_format_readers k fmt
+
+(******************************************************************************)
+ (* Scanf "%(...%)" tools *)
+
+(* Type used to cross and substract reader_nb_unifer. *)
+(* Used to interface make_format_subst_rnus and convert_fmtty_on_reader_nb. *)
+type (_, _, _, _, _, _, _) format_subst_rnus = Format_subst_rnus :
+ ('d3, 'q3, 'd2, 'q2) reader_nb_unifier *
+ ('d1, 'q1, 'd3, 'q3) reader_nb_unifier *
+ ('q1, 'e1, 'q3, 'e3) reader_nb_unifier ->
+ ('d1, 'q1, 'e1, 'd2, 'q2, 'd3, 'e3) format_subst_rnus
+
+(* Cross and substract reader_nb_unifers. *)
+(* Used when formats contains encapsulated "%(...%)" like "%(..%(..%)..%)". *)
+(* See (convert_fmtty_on_reader_nb _ "%(...%)"). *)
+let rec make_format_subst_rnus : type d1 q1 e1 d2 q2 d3 e3 .
+ (d1, e1, d3, e3) reader_nb_unifier -> (d1, q1, d2, q2) reader_nb_unifier ->
+ (d1, q1, e1, d2, q2, d3, e3) format_subst_rnus =
+fun rnu sub_rnu -> match rnu, sub_rnu with
+ | Succ_reader rnu_rest, Succ_reader sub_rnu_rest ->
+ let Format_subst_rnus (sub_rnu', sub_fmtty_rnu, rest_rnu) =
+ make_format_subst_rnus rnu_rest sub_rnu_rest in
+ Format_subst_rnus(Succ_reader sub_rnu', Succ_reader sub_fmtty_rnu, rest_rnu)
+ | _, Zero_reader ->
+ Format_subst_rnus (Zero_reader, Zero_reader, rnu)
+ | Zero_reader, Succ_reader _ ->
+ (* Impossible! By hypothesis: rnu > sub_rnu. *)
+ assert false
+
+(* Use a reader_nb_unifier to transform 'd and 'e parameters of an fmtty. *)
+(* See make_scanf "%(...%)". *)
+let rec convert_fmtty_on_reader_nb : type a b c d1 d2 e1 e2 f .
+ (d1, e1, d2, e2) reader_nb_unifier -> (a, b, c, d1, e1, f) fmtty ->
+ (a, b, c, d2, e2, f) fmtty =
+fun rnu fmtty -> match rnu, fmtty with
+ | _, Char_ty rest -> Char_ty (convert_fmtty_on_reader_nb rnu rest)
+ | _, String_ty rest -> String_ty (convert_fmtty_on_reader_nb rnu rest)
+ | _, Int_ty rest -> Int_ty (convert_fmtty_on_reader_nb rnu rest)
+ | _, Int32_ty rest -> Int32_ty (convert_fmtty_on_reader_nb rnu rest)
+ | _, Nativeint_ty rest -> Nativeint_ty (convert_fmtty_on_reader_nb rnu rest)
+ | _, Int64_ty rest -> Int64_ty (convert_fmtty_on_reader_nb rnu rest)
+ | _, Float_ty rest -> Float_ty (convert_fmtty_on_reader_nb rnu rest)
+ | _, Bool_ty rest -> Bool_ty (convert_fmtty_on_reader_nb rnu rest)
+ | _, Alpha_ty rest -> Alpha_ty (convert_fmtty_on_reader_nb rnu rest)
+ | _, Theta_ty rest -> Theta_ty (convert_fmtty_on_reader_nb rnu rest)
+
+ | Succ_reader rnu_rest, Reader_ty fmtty_rest ->
+ Reader_ty (convert_fmtty_on_reader_nb rnu_rest fmtty_rest)
+ | Succ_reader rnu_rest, Ignored_reader_ty fmtty_rest ->
+ Ignored_reader_ty (convert_fmtty_on_reader_nb rnu_rest fmtty_rest)
+
+ | _, Format_arg_ty (sub_fmtty, rest) ->
+ Format_arg_ty (sub_fmtty, convert_fmtty_on_reader_nb rnu rest)
+ | _, Format_subst_ty (sub_rnu, sub_fmtty, rest) ->
+ let Format_subst_rnus (sub_rnu', sub_fmtty_rnu, rest_rnu) =
+ make_format_subst_rnus rnu sub_rnu in
+ let sub_fmtty' = convert_fmtty_on_reader_nb sub_fmtty_rnu sub_fmtty in
+ let rest' = convert_fmtty_on_reader_nb rest_rnu rest in
+ Format_subst_ty (sub_rnu', sub_fmtty', rest')
+
+ | Zero_reader, End_of_fmtty -> End_of_fmtty
+
+ | Zero_reader, Reader_ty _ ->
+ (* Impossible, by typing constraints on fmtty and rnu constructors: *)
+ (* rnu = Zero_reader => d1 == e1 *)
+ (* fmtty = Reader_ty _ => d1 <> e1 *)
+ assert false
+ | Zero_reader, Ignored_reader_ty _ ->
+ assert false (* Similar. *)
+ | Succ_reader _, End_of_fmtty ->
+ assert false (* Similar. *)
+
+(******************************************************************************)
+ (* Generic scanning *)
+
+(* Make a generic scanning function. *)
+(* Scan a stream according to a format and readers obtained by
+ take_format_readers, and aggegate scanned values into an
+ heterogeneous list. *)
+(* Return the heterogeneous list of scanned values. *)
+let rec make_scanf : type a c d e f .
+ Scanning.in_channel -> (a, Scanning.in_channel, c, d, e, f) fmt ->
+ (d, _) heter_list -> (a, f) heter_list =
+fun ib fmt readers -> match fmt with
+ | Char rest ->
+ let _ = scan_char 0 ib in
+ let c = token_char ib in
+ Cons (c, make_scanf ib rest readers)
+ | Caml_char rest ->
+ let _ = scan_caml_char 0 ib in
+ let c = token_char ib in
+ Cons (c, make_scanf ib rest readers)
+
+ | String (pad, Formatting (fmting, rest)) ->
+ let stp, str = stopper_of_formatting fmting in
+ let scan width _ ib = scan_string (Some stp) width ib in
+ let str_rest = String_literal (str, rest) in
+ pad_prec_scanf ib str_rest readers pad No_precision scan token_string
+ | String (pad, rest) ->
+ let scan width _ ib = scan_string None width ib in
+ pad_prec_scanf ib rest readers pad No_precision scan token_string
+
+ | Caml_string (pad, rest) ->
+ let scan width _ ib = scan_caml_string width ib in
+ pad_prec_scanf ib rest readers pad No_precision scan token_string
+ | Int (iconv, pad, prec, rest) ->
+ let c = char_of_iconv iconv in
+ let scan width _ ib = scan_int_conv c width ib in
+ pad_prec_scanf ib rest readers pad prec scan (token_int c)
+ | Int32 (iconv, pad, prec, rest) ->
+ let c = char_of_iconv iconv in
+ let scan width _ ib = scan_int_conv c width ib in
+ pad_prec_scanf ib rest readers pad prec scan (token_int32 c)
+ | Nativeint (iconv, pad, prec, rest) ->
+ let c = char_of_iconv iconv in
+ let scan width _ ib = scan_int_conv c width ib in
+ pad_prec_scanf ib rest readers pad prec scan (token_nativeint c)
+ | Int64 (iconv, pad, prec, rest) ->
+ let c = char_of_iconv iconv in
+ let scan width _ ib = scan_int_conv c width ib in
+ pad_prec_scanf ib rest readers pad prec scan (token_int64 c)
+ | Float (Float_F, pad, prec, rest) ->
+ pad_prec_scanf ib rest readers pad prec scan_caml_float token_float
+ | Float ((Float_f | Float_pf | Float_sf | Float_e | Float_pe | Float_se
+ | Float_E | Float_pE | Float_sE | Float_g | Float_pg | Float_sg
+ | Float_G | Float_pG | Float_sG), pad, prec, rest) ->
+ pad_prec_scanf ib rest readers pad prec scan_float token_float
+
+ | Bool rest ->
+ let _ = scan_bool ib in
+ let b = token_bool ib in
+ Cons (b, make_scanf ib rest readers)
+ | Alpha _ ->
+ invalid_arg "scanf: bad conversion \"%a\""
+ | Theta _ ->
+ invalid_arg "scanf: bad conversion \"%t\""
+ | Reader fmt_rest ->
+ let Cons (reader, readers_rest) = readers in
+ let x = reader ib in
+ Cons (x, make_scanf ib fmt_rest readers_rest)
+ | Flush rest ->
+ if Scanning.end_of_input ib then make_scanf ib rest readers
+ else bad_input "end of input not found"
+
+ | String_literal (str, rest) ->
+ String.iter (check_char ib) str;
+ make_scanf ib rest readers
+ | Char_literal (chr, rest) ->
+ check_char ib chr;
+ make_scanf ib rest readers
+
+ | Format_arg (pad_opt, fmtty, rest) ->
+ let _ = scan_caml_string (width_of_pad_opt pad_opt) ib in
+ let s = token_string ib in
+ let fmt =
+ try format_of_string_fmtty s fmtty
+ with Failure msg -> bad_input msg
+ in
+ Cons (fmt, make_scanf ib rest readers)
+ | Format_subst (pad_opt, rnu, fmtty, rest) ->
+ let fmtty' = convert_fmtty_on_reader_nb rnu fmtty in
+ let _ = scan_caml_string (width_of_pad_opt pad_opt) ib in
+ let s = token_string ib in
+ let fmt, fmt' =
+ try
+ let Fmt_EBB fmt = fmt_ebb_of_string s in
+ type_format fmt fmtty, type_format fmt fmtty'
+ with Failure msg -> bad_input msg
+ in
+ Cons ((fmt', s), make_scanf ib (concat_fmt fmt rest) readers)
+
+ | Scan_char_set (width_opt, char_set, Formatting (fmting, rest)) ->
+ let stp, str = stopper_of_formatting fmting in
+ let width = width_of_pad_opt width_opt in
+ let _ = scan_chars_in_char_set char_set (Some stp) width ib in
+ let s = token_string ib in
+ let str_rest = String_literal (str, rest) in
+ Cons (s, make_scanf ib str_rest readers)
+ | Scan_char_set (width_opt, char_set, rest) ->
+ let width = width_of_pad_opt width_opt in
+ let _ = scan_chars_in_char_set char_set None width ib in
+ let s = token_string ib in
+ Cons (s, make_scanf ib rest readers)
+ | Scan_get_counter (counter, rest) ->
+ let count = get_counter ib counter in
+ Cons (count, make_scanf ib rest readers)
+
+ | Formatting (formatting, rest) ->
+ String.iter (check_char ib) (string_of_formatting formatting);
+ make_scanf ib rest readers
+
+ | Ignored_param (ign, rest) ->
+ let Param_format_EBB fmt' = param_format_of_ignored_format ign rest in
+ begin match make_scanf ib fmt' readers with
+ | Cons (_, arg_rest) -> arg_rest
+ | Nil -> assert false
+ end
-let sscanf_format s fmt = bscanf_format (Scanning.from_string s) fmt;;
+ | End_of_format ->
+ Nil
+
+(* Case analysis on padding and precision. *)
+(* Reject formats containing "%*" or "%.*". *)
+(* Pass padding and precision to the generic scanner `scan'. *)
+and pad_prec_scanf : type a c d e f x y z t .
+ Scanning.in_channel -> (a, Scanning.in_channel, c, d, e, f) fmt ->
+ (d, _) heter_list -> (x, y) padding -> (y, z -> a) precision ->
+ (int -> int -> Scanning.in_channel -> t) ->
+ (Scanning.in_channel -> z) ->
+ (x, f) heter_list =
+fun ib fmt readers pad prec scan token -> match pad, prec with
+ | No_padding, No_precision ->
+ let _ = scan max_int max_int ib in
+ let x = token ib in
+ Cons (x, make_scanf ib fmt readers)
+ | No_padding, Lit_precision p ->
+ let _ = scan max_int p ib in
+ let x = token ib in
+ Cons (x, make_scanf ib fmt readers)
+ | Lit_padding ((Right | Zeros), w), No_precision ->
+ let _ = scan w max_int ib in
+ let x = token ib in
+ Cons (x, make_scanf ib fmt readers)
+ | Lit_padding ((Right | Zeros), w), Lit_precision p ->
+ let _ = scan w p ib in
+ let x = token ib in
+ Cons (x, make_scanf ib fmt readers)
+ | Lit_padding (Left, _), _ ->
+ invalid_arg "scanf: bad conversion \"%-\""
+ | Lit_padding ((Right | Zeros), _), Arg_precision ->
+ invalid_arg "scanf: bad conversion \"%*\""
+ | Arg_padding _, _ ->
+ invalid_arg "scanf: bad conversion \"%*\""
+ | No_padding, Arg_precision ->
+ invalid_arg "scanf: bad conversion \"%*\""
+
+(******************************************************************************)
+ (* Defining [scanf] and various flavors of [scanf] *)
+
+type 'a kscanf_result = Args of 'a | Exc of exn
+
+let kscanf ib ef (fmt, str) =
+ let rec apply : type a b . a -> (a, b) heter_list -> b =
+ fun f args -> match args with
+ | Cons (x, r) -> apply (f x) r
+ | Nil -> f
+ in
+ let k readers f =
+ Scanning.reset_token ib;
+ match try Args (make_scanf ib fmt readers) with
+ | (Scan_failure _ | Failure _ | End_of_file) as exc -> Exc exc
+ | Invalid_argument msg ->
+ invalid_arg (msg ^ " in format \"" ^ String.escaped str ^ "\"")
+ with
+ | Args args -> apply f args
+ | Exc exc -> ef ib exc
+ in
+ take_format_readers k fmt
+
+let kbscanf = kscanf
+
+(***)
+
+let ksscanf s ef fmt = kbscanf (Scanning.from_string s) ef fmt
+let kfscanf ic ef fmt = kbscanf (Scanning.from_channel ic) ef fmt
+let bscanf ib fmt = kscanf ib scanf_bad_input fmt
+let fscanf ic fmt = kscanf (Scanning.from_channel ic) scanf_bad_input fmt
+let sscanf s fmt = kscanf (Scanning.from_string s) scanf_bad_input fmt
+let scanf fmt = kscanf Scanning.stdib scanf_bad_input fmt
+
+(***)
+
+let bscanf_format : Scanning.in_channel -> ('a, 'b, 'c, 'd, 'e, 'f) format6 ->
+ (('a, 'b, 'c, 'd, 'e, 'f) format6 -> 'g) -> 'g =
+ fun ib format f ->
+ let _ = scan_caml_string max_int ib in
+ let str = token_string ib in
+ let fmt' =
+ try format_of_string_format str format
+ with Failure msg -> bad_input msg
+ in
+ f fmt'
+
+let sscanf_format : string -> ('a, 'b, 'c, 'd, 'e, 'f) format6 ->
+ (('a, 'b, 'c, 'd, 'e, 'f) format6 -> 'g) -> 'g =
+ fun s format f -> bscanf_format (Scanning.from_string s) format f
let string_to_String s =
let l = String.length s in