diff options
Diffstat (limited to 'stdlib')
-rw-r--r-- | stdlib/scanf.ml | 230 |
1 files changed, 153 insertions, 77 deletions
diff --git a/stdlib/scanf.ml b/stdlib/scanf.ml index 72a013a14..4d75c149a 100644 --- a/stdlib/scanf.ml +++ b/stdlib/scanf.ml @@ -104,7 +104,8 @@ val from_file : string -> scanbuf;; val from_file_bin : string -> scanbuf;; val from_function : (unit -> char) -> scanbuf;; -end;; +end +;; module Scanning : SCANNING = struct @@ -121,7 +122,8 @@ type scanbuf = { mutable get_next_char : unit -> char; tokbuf : Buffer.t; file_name : file_name; -};; +} +;; let null_char = '\000';; @@ -141,7 +143,8 @@ let next_char ib = ib.current_char <- c; ib.current_char_is_valid <- false; ib.eof <- true; - c;; + c +;; let peek_char ib = if ib.current_char_is_valid then ib.current_char else next_char ib;; @@ -154,17 +157,21 @@ let peek_char ib = let checked_peek_char ib = let c = peek_char ib in if ib.eof then raise End_of_file; - c;; + c +;; let end_of_input ib = ignore (peek_char ib); - ib.eof;; + ib.eof +;; let eof ib = ib.eof;; let beginning_of_input ib = ib.char_count = 0;; let name_of_input ib = ib.file_name;; -let char_count ib = ib.char_count;; +let char_count ib = + if ib.current_char_is_valid then ib.char_count - 1 else ib.char_count +;; let line_count ib = ib.line_count;; let reset_token ib = Buffer.reset ib.tokbuf;; let invalidate_current_char ib = ib.current_char_is_valid <- false;; @@ -174,19 +181,22 @@ let token ib = let tok = Buffer.contents tokbuf in Buffer.clear tokbuf; ib.token_count <- succ ib.token_count; - tok;; + tok +;; let token_count ib = ib.token_count;; let skip_char ib max = invalidate_current_char ib; - max;; + max +;; let ignore_char ib max = skip_char ib (max - 1);; let store_char ib c max = Buffer.add_char ib.tokbuf c; - ignore_char ib max;; + ignore_char ib max +;; let default_token_buffer_size = 1024;; @@ -200,7 +210,8 @@ let create fname next = { get_next_char = next; tokbuf = Buffer.create default_token_buffer_size; file_name = fname; -};; +} +;; let from_string s = let i = ref 0 in @@ -210,7 +221,8 @@ let from_string s = let c = s.[!i] in incr i; c in - create "string input" next;; + create "string input" next +;; let from_function = create "function input";; @@ -276,7 +288,8 @@ let from_ic scan_close_ic fname ic = buf.[0] end end in - create fname next;; + create fname next +;; let from_ic_close_at_end = from_ic scan_close_at_end;; @@ -302,15 +315,18 @@ let from_channel = from_ic scan_raise_at_end "input channel";; characters have been read, we simply ask to read more. *) let stdib = from_ic scan_raise_at_end "stdin" stdin;; -end;; +end +;; (* Formatted input functions. *) type ('a, 'b, 'c, 'd) scanner = - ('a, Scanning.scanbuf, 'b, 'c, 'a -> 'd, 'd) format6 -> 'c;; + ('a, Scanning.scanbuf, 'b, 'c, 'a -> 'd, 'd) format6 -> 'c +;; external string_to_format : - string -> ('a, 'b, 'c, 'd, 'e, 'f) format6 = "%identity";; + string -> ('a, 'b, 'c, 'd, 'e, 'f) format6 = "%identity" +;; (* Reporting errors. *) exception Scan_failure of string;; @@ -319,7 +335,8 @@ let bad_input s = raise (Scan_failure s);; let bad_input_char c = bad_input (String.make 1 c);; let bad_input_escape c = - bad_input (Printf.sprintf "illegal escape character %C" c);; + bad_input (Printf.sprintf "illegal escape character %C" c) +;; module Sformat = Printf.CamlinternalPr.Sformat;; module Tformat = Printf.CamlinternalPr.Tformat;; @@ -328,28 +345,35 @@ let bad_conversion fmt i c = invalid_arg (Printf.sprintf "scanf: bad conversion %%%c, at char number %i \ - in format string ``%s''" c i (Sformat.to_string fmt));; + in format string ``%s''" c i (Sformat.to_string fmt)) +;; let incomplete_format fmt = invalid_arg (Printf.sprintf "scanf: premature end of format string ``%s''" - (Sformat.to_string fmt));; + (Sformat.to_string fmt)) +;; -let bad_float () = bad_input "no dot or exponent part found in float token";; +let bad_float () = bad_input "no dot or exponent part found in +float token" +;; let character_mismatch_err c ci = - Printf.sprintf "looking for %C, found %C" c ci;; + Printf.sprintf "looking for %C, found %C" c ci +;; let character_mismatch c ci = - bad_input (character_mismatch_err c ci);; + bad_input (character_mismatch_err c ci) +;; let format_mismatch_err fmt1 fmt2 = Printf.sprintf - "format read ``%s'' does not match specification ``%s''" fmt1 fmt2;; + "format read ``%s'' does not match specification ``%s''" fmt1 fmt2 +;; let format_mismatch fmt1 fmt2 = bad_input (format_mismatch_err fmt1 fmt2);; -(* Checking that 2 format string are type compatible. *) +(* Checking that 2 format strings are type compatible. *) let compatible_format_type fmt1 fmt2 = Tformat.summarize_format_type (string_to_format fmt1) = Tformat.summarize_format_type (string_to_format fmt2);; @@ -362,7 +386,8 @@ let compatible_format_type fmt1 fmt2 = let check_char ib c = let ci = Scanning.checked_peek_char ib in if ci = c then Scanning.invalidate_current_char ib else - character_mismatch c ci;; + character_mismatch c ci +;; (* Checks that the current char is indeed one of the stopper characters, then skips it. @@ -375,7 +400,8 @@ let ignore_stoppers stps ib = if List.memq ci stps then Scanning.invalidate_current_char ib else let sr = String.concat "" (List.map (String.make 1) stps) in bad_input - (Printf.sprintf "looking for one of range %S, found %C" sr ci);; + (Printf.sprintf "looking for one of range %S, found %C" sr ci) +;; (* Extracting tokens from ouput token buffer. *) @@ -401,7 +427,8 @@ let token_int_literal conv ib = | 'b' -> "0b" ^ Scanning.token ib | _ -> assert false in let l = String.length tok in - if l = 0 || tok.[0] <> '+' then tok else String.sub tok 1 (l - 1);; + if l = 0 || tok.[0] <> '+' then tok else String.sub tok 1 (l - 1) +;; (* All the functions that convert a string to a number raise the exception Failure when the conversion is not possible. @@ -417,11 +444,14 @@ let token_float ib = float_of_string (Scanning.token ib);; However, we can bind and use the corresponding primitives that are available in the runtime. *) external nativeint_of_string : string -> nativeint - = "caml_nativeint_of_string";; + = "caml_nativeint_of_string" +;; external int32_of_string : string -> int32 - = "caml_int32_of_string";; + = "caml_int32_of_string" +;; external int64_of_string : string -> int64 - = "caml_int64_of_string";; + = "caml_int64_of_string" +;; let token_nativeint conv ib = nativeint_of_string (token_int_literal conv ib);; let token_int32 conv ib = int32_of_string (token_int_literal conv ib);; @@ -450,7 +480,8 @@ let rec scan_decimal_digits max ib = | '_' -> let max = Scanning.ignore_char ib max in scan_decimal_digits max ib - | _ -> max;; + | _ -> max +;; let scan_decimal_digits_plus max ib = let c = Scanning.checked_peek_char ib in @@ -458,7 +489,8 @@ let scan_decimal_digits_plus max ib = | '0' .. '9' -> let max = Scanning.store_char ib c max in scan_decimal_digits max ib - | c -> bad_input_char c;; + | c -> bad_input_char c +;; let scan_digits_plus digitp max ib = (* To scan numbers from other bases, we use a predicate argument to @@ -480,23 +512,27 @@ let scan_digits_plus digitp max ib = if digitp c then let max = Scanning.store_char ib c max in scan_digits max - else bad_input_char c;; + else bad_input_char c +;; let is_binary_digit = function | '0' .. '1' -> true - | _ -> false;; + | _ -> false +;; let scan_binary_int = scan_digits_plus is_binary_digit;; let is_octal_digit = function | '0' .. '7' -> true - | _ -> false;; + | _ -> false +;; let scan_octal_int = scan_digits_plus is_octal_digit;; let is_hexa_digit = function | '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> true - | _ -> false;; + | _ -> false +;; let scan_hexadecimal_int = scan_digits_plus is_hexa_digit;; @@ -508,11 +544,13 @@ let scan_sign max ib = match c with | '+' -> Scanning.store_char ib c max | '-' -> Scanning.store_char ib c max - | c -> max;; + | c -> max +;; let scan_optionally_signed_decimal_int max ib = let max = scan_sign max ib in - scan_unsigned_decimal_int max ib;; + scan_unsigned_decimal_int max ib +;; (* Scan an unsigned integer that could be given in any (common) basis. If digits are prefixed by one of 0x, 0X, 0o, or 0b, the number is @@ -530,11 +568,13 @@ let scan_unsigned_int max ib = | 'o' -> scan_octal_int (Scanning.store_char ib c max) ib | 'b' -> scan_binary_int (Scanning.store_char ib c max) ib | c -> scan_decimal_digits max ib end - | c -> scan_unsigned_decimal_int max ib;; + | c -> scan_unsigned_decimal_int max ib +;; let scan_optionally_signed_int max ib = let max = scan_sign max ib in - scan_unsigned_int max ib;; + scan_unsigned_int max ib +;; let scan_int_conv conv max ib = match conv with @@ -544,7 +584,8 @@ let scan_int_conv conv max ib = | 'o' -> scan_octal_int max ib | 'u' -> scan_unsigned_decimal_int max ib | 'x' | 'X' -> scan_hexadecimal_int max ib - | c -> assert false;; + | c -> assert false +;; (* Scanning floating point numbers. *) (* Fractional part is optional and can be reduced to 0 digits. *) @@ -555,7 +596,8 @@ let scan_frac_part max ib = match c with | '0' .. '9' as c -> scan_decimal_digits (Scanning.store_char ib c max) ib - | _ -> max;; + | _ -> max +;; (* Exp part is optional and can be reduced to 0 digits. *) let scan_exp_part max ib = @@ -565,7 +607,8 @@ let scan_exp_part max ib = match c with | 'e' | 'E' as c -> scan_optionally_signed_decimal_int (Scanning.store_char ib c max) ib - | _ -> max;; + | _ -> max +;; (* Scan the integer part of a floating point number, (not using the Caml lexical convention since the integer part can be empty): @@ -573,7 +616,8 @@ let scan_exp_part max ib = digits (e.g. -.1). *) let scan_int_part max ib = let max = scan_sign max ib in - scan_decimal_digits max ib;; + scan_decimal_digits max ib +;; let scan_float max ib = let max = scan_int_part max ib in @@ -585,7 +629,8 @@ let scan_float max ib = let max = Scanning.store_char ib c max in let max = scan_frac_part max ib in scan_exp_part max ib - | c -> scan_exp_part max ib;; + | c -> scan_exp_part max ib +;; let scan_Float max ib = let max = scan_optionally_signed_decimal_int max ib in @@ -599,7 +644,8 @@ let scan_Float max ib = scan_exp_part max ib | 'e' | 'E' -> scan_exp_part max ib - | c -> bad_float ();; + | c -> bad_float () +;; (* Scan a regular string: stops when encountering a space or one of the characters in stp. It also stops when the maximum number of @@ -615,18 +661,21 @@ let scan_string stp max ib = | c -> loop (Scanning.store_char ib c max) else if List.memq c stp then Scanning.skip_char ib max else loop (Scanning.store_char ib c max) in - loop max;; + loop max +;; (* Scan a char: peek strictly one character in the input, whatsoever. *) let scan_char max ib = - Scanning.store_char ib (Scanning.checked_peek_char ib) max;; + Scanning.store_char ib (Scanning.checked_peek_char ib) max +;; let char_for_backslash = function | 'n' -> '\010' | 'r' -> '\013' | 'b' -> '\008' | 't' -> '\009' - | c -> c;; + | c -> c +;; (* The integer value corresponding to the facial value of a valid decimal digit character. *) @@ -639,7 +688,8 @@ let char_for_decimal_code c0 c1 c2 = int_value_of_char c2 in if c < 0 || c > 255 then bad_input (Printf.sprintf "bad char \\%c%c%c" c0 c1 c2) - else char_of_int c;; + else char_of_int c +;; (* Called when encountering '\\' as starter of a char. Stops before the corresponding '\''. *) @@ -660,7 +710,8 @@ let scan_backslash_char max ib = let c1 = get_digit () in let c2 = get_digit () in Scanning.store_char ib (char_for_decimal_code c0 c1 c2) (max - 2) - | c -> bad_input_char c;; + | c -> bad_input_char c +;; let scan_Char max ib = let rec loop s max = @@ -681,7 +732,8 @@ let scan_Char max ib = | c, 2 -> loop 1 (Scanning.store_char ib c max) (* Any other case is an error, *) | c, _ -> bad_input_char c in - loop 3 max;; + loop 3 max +;; let scan_String max ib = let rec loop s max = @@ -708,7 +760,8 @@ let scan_String max ib = | '\\', false -> loop false max | c, false -> loop false (Scanning.store_char ib c max) | _, _ -> loop false (scan_backslash_char (max - 1) ib) in - loop true max;; + loop true max +;; let scan_bool max ib = if max < 4 then bad_input "a boolean" else @@ -719,12 +772,14 @@ let scan_bool max ib = | 't' -> 4 | 'f' -> 5 | _ -> bad_input "a boolean" in - scan_string [] (min max m) ib;; + scan_string [] (min max m) ib +;; (* Reading char sets in %[...] conversions. *) type char_set = | Pos_set of string (* Positive (regular) set. *) - | Neg_set of string (* Negative (complementary) set. *);; + | Neg_set of string (* Negative (complementary) set. *) +;; (* Char sets are read as sub-strings in the format string. *) let read_char_set fmt i = @@ -750,14 +805,16 @@ let read_char_set fmt i = j, Neg_set (Sformat.sub fmt (Sformat.index_of_int i) (j - i)) | _ -> let j = find_set i in - j, Pos_set (Sformat.sub fmt (Sformat.index_of_int i) (j - i));; + j, Pos_set (Sformat.sub fmt (Sformat.index_of_int i) (j - i)) +;; (* Char sets are now represented as bitvects that are represented as byte strings. *) (* Bit manipulations into bytes. *) let set_bit_of_byte byte idx b = - (b lsl idx) lor (byte land (* mask idx *) (lnot (1 lsl idx)));; + (b lsl idx) lor (byte land (* mask idx *) (lnot (1 lsl idx))) +;; let get_bit_of_byte byte idx = (byte lsr idx) land 1;; @@ -766,20 +823,23 @@ let set_bit_of_range r c b = let idx = c land 0x7 in let ydx = c lsr 3 in let byte = r.[ydx] in - r.[ydx] <- char_of_int (set_bit_of_byte (int_of_char byte) idx b);; + r.[ydx] <- char_of_int (set_bit_of_byte (int_of_char byte) idx b) +;; let get_bit_of_range r c = let idx = c land 0x7 in let ydx = c lsr 3 in let byte = r.[ydx] in - get_bit_of_byte (int_of_char byte) idx;; + get_bit_of_byte (int_of_char byte) idx +;; (* Char sets represented as bitvects represented as fixed length byte strings. *) (* Create a full or empty set of chars. *) let make_range bit = let c = char_of_int (if bit = 0 then 0 else 0xFF) in - String.make 32 c;; + String.make 32 c +;; (* Test if a char belongs to a set of chars. *) let get_char_in_range r c = get_bit_of_range r (int_of_char c);; @@ -809,14 +869,16 @@ let make_char_bit_vect bit set = set_bit_of_range r (int_of_char set.[i]) bit; loop bit true (succ i) in loop bit false 0; - r;; + r +;; (* Compute the predicate on chars corresponding to a char set. *) let make_pred bit set stp = let r = make_char_bit_vect bit set in List.iter (fun c -> set_bit_of_range r (int_of_char c) (bit_not bit)) stp; - (fun c -> get_char_in_range r c);; + (fun c -> get_char_in_range r c) +;; let make_setp stp char_set = match char_set with @@ -849,7 +911,8 @@ let make_setp stp char_set = if p2 = '-' then make_pred 0 set stp else (fun c -> if c != p1 && c != p2 && c != p3 then 1 else 0) | n -> make_pred 0 set stp - end;; + end +;; let setp_table = Hashtbl.create 7;; @@ -860,14 +923,16 @@ let add_setp stp char_set setp = let char_set_tbl = Hashtbl.create 3 in Hashtbl.add setp_table char_set char_set_tbl; char_set_tbl in - Hashtbl.add char_set_tbl stp setp;; + Hashtbl.add char_set_tbl stp setp +;; let find_setp stp char_set = try Hashtbl.find (Hashtbl.find setp_table char_set) stp with | Not_found -> let setp = make_setp stp char_set in add_setp stp char_set setp; - setp;; + setp +;; let scan_chars_in_char_set stp char_set max ib = let rec loop_pos1 cp1 max = @@ -937,13 +1002,15 @@ let scan_chars_in_char_set stp char_set max ib = | 3 when set.[1] != '-' -> loop_neg3 set.[0] set.[1] set.[2] max | n -> loop (find_setp stp char_set) max end in ignore_stoppers stp ib; - max;; + max +;; let get_count t ib = match t with | 'l' -> Scanning.line_count ib | 'n' -> Scanning.char_count ib - | _ -> Scanning.token_count ib;; + | _ -> Scanning.token_count ib +;; let rec skip_whites ib = let c = Scanning.peek_char ib in @@ -952,21 +1019,24 @@ let rec skip_whites ib = | ' ' | '\t' | '\n' | '\r' -> Scanning.invalidate_current_char ib; skip_whites ib | _ -> () - end;; + end +;; let list_iter_i f l = let rec loop i = function | [] -> () | [x] -> f i x (* Tail calling [f] *) | x :: xs -> f i x; loop (succ i) xs in - loop 0 l;; + loop 0 l +;; (* The global error report function for Scanf. *) let scanf_bad_input ib = function | Scan_failure s | Failure s -> let i = Scanning.char_count ib in bad_input (Printf.sprintf "scanf: bad input at char number %i: %s" i s) - | x -> raise x;; + | x -> raise x +;; let ascanf sc fmt = let ac = Tformat.ac_of_format fmt in @@ -979,7 +1049,7 @@ let ascanf sc fmt = Obj.magic (fun x y f -> sc fmt [| Obj.repr x; Obj.repr y; |] f) | 3 -> Obj.magic - (fun x y z f -> sc fmt [| Obj.repr x; Obj.repr y; Obj.repr z; |] f) + (fun x y z f -> sc fmt [| Obj.repr x; Obj.repr y; Obj.repr z; |] f) | nargs -> let rec loop i args = if i >= nargs then @@ -987,7 +1057,8 @@ let ascanf sc fmt = list_iter_i (fun i arg -> a.(nargs - i - 1) <- arg) args; Obj.magic (fun f -> sc fmt a f) else Obj.magic (fun x -> loop (succ i) (x :: args)) in - loop 0 [];; + loop 0 [] +;; (* The [scan_format] main scanning function. It takes as arguments: @@ -1125,7 +1196,7 @@ let scan_format ib ef fmt rv f = incomplete_format bad_conversion conv fmt i in let mf = Sformat.sub fmt (Sformat.index_of_int i) (j - 2 - i) in (* Read the specified format string in the input buffer, - and check its correction. *) + and check its correction. *) let _x = scan_String max ib in let rf = token_string ib in if not (compatible_format_type rf mf) then format_mismatch rf mf else @@ -1155,11 +1226,13 @@ let scan_format ib ef fmt rv f = try snd (scan fmt 0 (fun () -> f) 0) with | (Scan_failure _ | Failure _ | End_of_file) as exc -> stack (delay ef ib) exc in - return v;; + return v +;; let mkscanf ib ef fmt = let sc = scan_format ib ef in - ascanf sc fmt;; + ascanf sc fmt +;; let kscanf ib ef fmt = mkscanf ib ef fmt;; @@ -1176,7 +1249,8 @@ let bscanf_format ib fmt f = let fmt1 = ignore (scan_String max_int ib); token_string ib in if not (compatible_format_type fmt1 fmt) then format_mismatch fmt1 fmt else - f (string_to_format fmt1);; + f (string_to_format fmt1) +;; let sscanf_format s fmt f = bscanf_format (Scanning.from_string s) fmt f;; @@ -1185,7 +1259,9 @@ let quote_string s = Buffer.add_char b '\"'; Buffer.add_string b s; Buffer.add_char b '\"'; - Buffer.contents b;; + Buffer.contents b +;; let format_from_string s fmt = - sscanf_format (quote_string s) fmt (fun x -> x);; + sscanf_format (quote_string s) fmt (fun x -> x) +;; |