summaryrefslogtreecommitdiffstats
path: root/stdlib
diff options
context:
space:
mode:
Diffstat (limited to 'stdlib')
-rw-r--r--stdlib/scanf.ml230
1 files changed, 153 insertions, 77 deletions
diff --git a/stdlib/scanf.ml b/stdlib/scanf.ml
index 72a013a14..4d75c149a 100644
--- a/stdlib/scanf.ml
+++ b/stdlib/scanf.ml
@@ -104,7 +104,8 @@ val from_file : string -> scanbuf;;
val from_file_bin : string -> scanbuf;;
val from_function : (unit -> char) -> scanbuf;;
-end;;
+end
+;;
module Scanning : SCANNING = struct
@@ -121,7 +122,8 @@ type scanbuf = {
mutable get_next_char : unit -> char;
tokbuf : Buffer.t;
file_name : file_name;
-};;
+}
+;;
let null_char = '\000';;
@@ -141,7 +143,8 @@ let next_char ib =
ib.current_char <- c;
ib.current_char_is_valid <- false;
ib.eof <- true;
- c;;
+ c
+;;
let peek_char ib =
if ib.current_char_is_valid then ib.current_char else next_char ib;;
@@ -154,17 +157,21 @@ let peek_char ib =
let checked_peek_char ib =
let c = peek_char ib in
if ib.eof then raise End_of_file;
- c;;
+ c
+;;
let end_of_input ib =
ignore (peek_char ib);
- ib.eof;;
+ ib.eof
+;;
let eof ib = ib.eof;;
let beginning_of_input ib = ib.char_count = 0;;
let name_of_input ib = ib.file_name;;
-let char_count ib = ib.char_count;;
+let char_count ib =
+ if ib.current_char_is_valid then ib.char_count - 1 else ib.char_count
+;;
let line_count ib = ib.line_count;;
let reset_token ib = Buffer.reset ib.tokbuf;;
let invalidate_current_char ib = ib.current_char_is_valid <- false;;
@@ -174,19 +181,22 @@ let token ib =
let tok = Buffer.contents tokbuf in
Buffer.clear tokbuf;
ib.token_count <- succ ib.token_count;
- tok;;
+ tok
+;;
let token_count ib = ib.token_count;;
let skip_char ib max =
invalidate_current_char ib;
- max;;
+ max
+;;
let ignore_char ib max = skip_char ib (max - 1);;
let store_char ib c max =
Buffer.add_char ib.tokbuf c;
- ignore_char ib max;;
+ ignore_char ib max
+;;
let default_token_buffer_size = 1024;;
@@ -200,7 +210,8 @@ let create fname next = {
get_next_char = next;
tokbuf = Buffer.create default_token_buffer_size;
file_name = fname;
-};;
+}
+;;
let from_string s =
let i = ref 0 in
@@ -210,7 +221,8 @@ let from_string s =
let c = s.[!i] in
incr i;
c in
- create "string input" next;;
+ create "string input" next
+;;
let from_function = create "function input";;
@@ -276,7 +288,8 @@ let from_ic scan_close_ic fname ic =
buf.[0]
end
end in
- create fname next;;
+ create fname next
+;;
let from_ic_close_at_end = from_ic scan_close_at_end;;
@@ -302,15 +315,18 @@ let from_channel = from_ic scan_raise_at_end "input channel";;
characters have been read, we simply ask to read more. *)
let stdib = from_ic scan_raise_at_end "stdin" stdin;;
-end;;
+end
+;;
(* Formatted input functions. *)
type ('a, 'b, 'c, 'd) scanner =
- ('a, Scanning.scanbuf, 'b, 'c, 'a -> 'd, 'd) format6 -> 'c;;
+ ('a, Scanning.scanbuf, 'b, 'c, 'a -> 'd, 'd) format6 -> 'c
+;;
external string_to_format :
- string -> ('a, 'b, 'c, 'd, 'e, 'f) format6 = "%identity";;
+ string -> ('a, 'b, 'c, 'd, 'e, 'f) format6 = "%identity"
+;;
(* Reporting errors. *)
exception Scan_failure of string;;
@@ -319,7 +335,8 @@ let bad_input s = raise (Scan_failure s);;
let bad_input_char c = bad_input (String.make 1 c);;
let bad_input_escape c =
- bad_input (Printf.sprintf "illegal escape character %C" c);;
+ bad_input (Printf.sprintf "illegal escape character %C" c)
+;;
module Sformat = Printf.CamlinternalPr.Sformat;;
module Tformat = Printf.CamlinternalPr.Tformat;;
@@ -328,28 +345,35 @@ let bad_conversion fmt i c =
invalid_arg
(Printf.sprintf
"scanf: bad conversion %%%c, at char number %i \
- in format string ``%s''" c i (Sformat.to_string fmt));;
+ in format string ``%s''" c i (Sformat.to_string fmt))
+;;
let incomplete_format fmt =
invalid_arg
(Printf.sprintf "scanf: premature end of format string ``%s''"
- (Sformat.to_string fmt));;
+ (Sformat.to_string fmt))
+;;
-let bad_float () = bad_input "no dot or exponent part found in float token";;
+let bad_float () = bad_input "no dot or exponent part found in
+float token"
+;;
let character_mismatch_err c ci =
- Printf.sprintf "looking for %C, found %C" c ci;;
+ Printf.sprintf "looking for %C, found %C" c ci
+;;
let character_mismatch c ci =
- bad_input (character_mismatch_err c ci);;
+ bad_input (character_mismatch_err c ci)
+;;
let format_mismatch_err fmt1 fmt2 =
Printf.sprintf
- "format read ``%s'' does not match specification ``%s''" fmt1 fmt2;;
+ "format read ``%s'' does not match specification ``%s''" fmt1 fmt2
+;;
let format_mismatch fmt1 fmt2 = bad_input (format_mismatch_err fmt1 fmt2);;
-(* Checking that 2 format string are type compatible. *)
+(* Checking that 2 format strings are type compatible. *)
let compatible_format_type fmt1 fmt2 =
Tformat.summarize_format_type (string_to_format fmt1) =
Tformat.summarize_format_type (string_to_format fmt2);;
@@ -362,7 +386,8 @@ let compatible_format_type fmt1 fmt2 =
let check_char ib c =
let ci = Scanning.checked_peek_char ib in
if ci = c then Scanning.invalidate_current_char ib else
- character_mismatch c ci;;
+ character_mismatch c ci
+;;
(* Checks that the current char is indeed one of the stopper characters,
then skips it.
@@ -375,7 +400,8 @@ let ignore_stoppers stps ib =
if List.memq ci stps then Scanning.invalidate_current_char ib else
let sr = String.concat "" (List.map (String.make 1) stps) in
bad_input
- (Printf.sprintf "looking for one of range %S, found %C" sr ci);;
+ (Printf.sprintf "looking for one of range %S, found %C" sr ci)
+;;
(* Extracting tokens from ouput token buffer. *)
@@ -401,7 +427,8 @@ let token_int_literal conv ib =
| 'b' -> "0b" ^ Scanning.token ib
| _ -> assert false in
let l = String.length tok in
- if l = 0 || tok.[0] <> '+' then tok else String.sub tok 1 (l - 1);;
+ if l = 0 || tok.[0] <> '+' then tok else String.sub tok 1 (l - 1)
+;;
(* All the functions that convert a string to a number raise the exception
Failure when the conversion is not possible.
@@ -417,11 +444,14 @@ let token_float ib = float_of_string (Scanning.token ib);;
However, we can bind and use the corresponding primitives that are
available in the runtime. *)
external nativeint_of_string : string -> nativeint
- = "caml_nativeint_of_string";;
+ = "caml_nativeint_of_string"
+;;
external int32_of_string : string -> int32
- = "caml_int32_of_string";;
+ = "caml_int32_of_string"
+;;
external int64_of_string : string -> int64
- = "caml_int64_of_string";;
+ = "caml_int64_of_string"
+;;
let token_nativeint conv ib = nativeint_of_string (token_int_literal conv ib);;
let token_int32 conv ib = int32_of_string (token_int_literal conv ib);;
@@ -450,7 +480,8 @@ let rec scan_decimal_digits max ib =
| '_' ->
let max = Scanning.ignore_char ib max in
scan_decimal_digits max ib
- | _ -> max;;
+ | _ -> max
+;;
let scan_decimal_digits_plus max ib =
let c = Scanning.checked_peek_char ib in
@@ -458,7 +489,8 @@ let scan_decimal_digits_plus max ib =
| '0' .. '9' ->
let max = Scanning.store_char ib c max in
scan_decimal_digits max ib
- | c -> bad_input_char c;;
+ | c -> bad_input_char c
+;;
let scan_digits_plus digitp max ib =
(* To scan numbers from other bases, we use a predicate argument to
@@ -480,23 +512,27 @@ let scan_digits_plus digitp max ib =
if digitp c then
let max = Scanning.store_char ib c max in
scan_digits max
- else bad_input_char c;;
+ else bad_input_char c
+;;
let is_binary_digit = function
| '0' .. '1' -> true
- | _ -> false;;
+ | _ -> false
+;;
let scan_binary_int = scan_digits_plus is_binary_digit;;
let is_octal_digit = function
| '0' .. '7' -> true
- | _ -> false;;
+ | _ -> false
+;;
let scan_octal_int = scan_digits_plus is_octal_digit;;
let is_hexa_digit = function
| '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> true
- | _ -> false;;
+ | _ -> false
+;;
let scan_hexadecimal_int = scan_digits_plus is_hexa_digit;;
@@ -508,11 +544,13 @@ let scan_sign max ib =
match c with
| '+' -> Scanning.store_char ib c max
| '-' -> Scanning.store_char ib c max
- | c -> max;;
+ | c -> max
+;;
let scan_optionally_signed_decimal_int max ib =
let max = scan_sign max ib in
- scan_unsigned_decimal_int max ib;;
+ scan_unsigned_decimal_int max ib
+;;
(* Scan an unsigned integer that could be given in any (common) basis.
If digits are prefixed by one of 0x, 0X, 0o, or 0b, the number is
@@ -530,11 +568,13 @@ let scan_unsigned_int max ib =
| 'o' -> scan_octal_int (Scanning.store_char ib c max) ib
| 'b' -> scan_binary_int (Scanning.store_char ib c max) ib
| c -> scan_decimal_digits max ib end
- | c -> scan_unsigned_decimal_int max ib;;
+ | c -> scan_unsigned_decimal_int max ib
+;;
let scan_optionally_signed_int max ib =
let max = scan_sign max ib in
- scan_unsigned_int max ib;;
+ scan_unsigned_int max ib
+;;
let scan_int_conv conv max ib =
match conv with
@@ -544,7 +584,8 @@ let scan_int_conv conv max ib =
| 'o' -> scan_octal_int max ib
| 'u' -> scan_unsigned_decimal_int max ib
| 'x' | 'X' -> scan_hexadecimal_int max ib
- | c -> assert false;;
+ | c -> assert false
+;;
(* Scanning floating point numbers. *)
(* Fractional part is optional and can be reduced to 0 digits. *)
@@ -555,7 +596,8 @@ let scan_frac_part max ib =
match c with
| '0' .. '9' as c ->
scan_decimal_digits (Scanning.store_char ib c max) ib
- | _ -> max;;
+ | _ -> max
+;;
(* Exp part is optional and can be reduced to 0 digits. *)
let scan_exp_part max ib =
@@ -565,7 +607,8 @@ let scan_exp_part max ib =
match c with
| 'e' | 'E' as c ->
scan_optionally_signed_decimal_int (Scanning.store_char ib c max) ib
- | _ -> max;;
+ | _ -> max
+;;
(* Scan the integer part of a floating point number, (not using the
Caml lexical convention since the integer part can be empty):
@@ -573,7 +616,8 @@ let scan_exp_part max ib =
digits (e.g. -.1). *)
let scan_int_part max ib =
let max = scan_sign max ib in
- scan_decimal_digits max ib;;
+ scan_decimal_digits max ib
+;;
let scan_float max ib =
let max = scan_int_part max ib in
@@ -585,7 +629,8 @@ let scan_float max ib =
let max = Scanning.store_char ib c max in
let max = scan_frac_part max ib in
scan_exp_part max ib
- | c -> scan_exp_part max ib;;
+ | c -> scan_exp_part max ib
+;;
let scan_Float max ib =
let max = scan_optionally_signed_decimal_int max ib in
@@ -599,7 +644,8 @@ let scan_Float max ib =
scan_exp_part max ib
| 'e' | 'E' ->
scan_exp_part max ib
- | c -> bad_float ();;
+ | c -> bad_float ()
+;;
(* Scan a regular string: stops when encountering a space or one of the
characters in stp. It also stops when the maximum number of
@@ -615,18 +661,21 @@ let scan_string stp max ib =
| c -> loop (Scanning.store_char ib c max) else
if List.memq c stp then Scanning.skip_char ib max else
loop (Scanning.store_char ib c max) in
- loop max;;
+ loop max
+;;
(* Scan a char: peek strictly one character in the input, whatsoever. *)
let scan_char max ib =
- Scanning.store_char ib (Scanning.checked_peek_char ib) max;;
+ Scanning.store_char ib (Scanning.checked_peek_char ib) max
+;;
let char_for_backslash = function
| 'n' -> '\010'
| 'r' -> '\013'
| 'b' -> '\008'
| 't' -> '\009'
- | c -> c;;
+ | c -> c
+;;
(* The integer value corresponding to the facial value of a valid
decimal digit character. *)
@@ -639,7 +688,8 @@ let char_for_decimal_code c0 c1 c2 =
int_value_of_char c2 in
if c < 0 || c > 255
then bad_input (Printf.sprintf "bad char \\%c%c%c" c0 c1 c2)
- else char_of_int c;;
+ else char_of_int c
+;;
(* Called when encountering '\\' as starter of a char.
Stops before the corresponding '\''. *)
@@ -660,7 +710,8 @@ let scan_backslash_char max ib =
let c1 = get_digit () in
let c2 = get_digit () in
Scanning.store_char ib (char_for_decimal_code c0 c1 c2) (max - 2)
- | c -> bad_input_char c;;
+ | c -> bad_input_char c
+;;
let scan_Char max ib =
let rec loop s max =
@@ -681,7 +732,8 @@ let scan_Char max ib =
| c, 2 -> loop 1 (Scanning.store_char ib c max)
(* Any other case is an error, *)
| c, _ -> bad_input_char c in
- loop 3 max;;
+ loop 3 max
+;;
let scan_String max ib =
let rec loop s max =
@@ -708,7 +760,8 @@ let scan_String max ib =
| '\\', false -> loop false max
| c, false -> loop false (Scanning.store_char ib c max)
| _, _ -> loop false (scan_backslash_char (max - 1) ib) in
- loop true max;;
+ loop true max
+;;
let scan_bool max ib =
if max < 4 then bad_input "a boolean" else
@@ -719,12 +772,14 @@ let scan_bool max ib =
| 't' -> 4
| 'f' -> 5
| _ -> bad_input "a boolean" in
- scan_string [] (min max m) ib;;
+ scan_string [] (min max m) ib
+;;
(* Reading char sets in %[...] conversions. *)
type char_set =
| Pos_set of string (* Positive (regular) set. *)
- | Neg_set of string (* Negative (complementary) set. *);;
+ | Neg_set of string (* Negative (complementary) set. *)
+;;
(* Char sets are read as sub-strings in the format string. *)
let read_char_set fmt i =
@@ -750,14 +805,16 @@ let read_char_set fmt i =
j, Neg_set (Sformat.sub fmt (Sformat.index_of_int i) (j - i))
| _ ->
let j = find_set i in
- j, Pos_set (Sformat.sub fmt (Sformat.index_of_int i) (j - i));;
+ j, Pos_set (Sformat.sub fmt (Sformat.index_of_int i) (j - i))
+;;
(* Char sets are now represented as bitvects that are represented as
byte strings. *)
(* Bit manipulations into bytes. *)
let set_bit_of_byte byte idx b =
- (b lsl idx) lor (byte land (* mask idx *) (lnot (1 lsl idx)));;
+ (b lsl idx) lor (byte land (* mask idx *) (lnot (1 lsl idx)))
+;;
let get_bit_of_byte byte idx = (byte lsr idx) land 1;;
@@ -766,20 +823,23 @@ let set_bit_of_range r c b =
let idx = c land 0x7 in
let ydx = c lsr 3 in
let byte = r.[ydx] in
- r.[ydx] <- char_of_int (set_bit_of_byte (int_of_char byte) idx b);;
+ r.[ydx] <- char_of_int (set_bit_of_byte (int_of_char byte) idx b)
+;;
let get_bit_of_range r c =
let idx = c land 0x7 in
let ydx = c lsr 3 in
let byte = r.[ydx] in
- get_bit_of_byte (int_of_char byte) idx;;
+ get_bit_of_byte (int_of_char byte) idx
+;;
(* Char sets represented as bitvects represented as fixed length byte
strings. *)
(* Create a full or empty set of chars. *)
let make_range bit =
let c = char_of_int (if bit = 0 then 0 else 0xFF) in
- String.make 32 c;;
+ String.make 32 c
+;;
(* Test if a char belongs to a set of chars. *)
let get_char_in_range r c = get_bit_of_range r (int_of_char c);;
@@ -809,14 +869,16 @@ let make_char_bit_vect bit set =
set_bit_of_range r (int_of_char set.[i]) bit;
loop bit true (succ i) in
loop bit false 0;
- r;;
+ r
+;;
(* Compute the predicate on chars corresponding to a char set. *)
let make_pred bit set stp =
let r = make_char_bit_vect bit set in
List.iter
(fun c -> set_bit_of_range r (int_of_char c) (bit_not bit)) stp;
- (fun c -> get_char_in_range r c);;
+ (fun c -> get_char_in_range r c)
+;;
let make_setp stp char_set =
match char_set with
@@ -849,7 +911,8 @@ let make_setp stp char_set =
if p2 = '-' then make_pred 0 set stp else
(fun c -> if c != p1 && c != p2 && c != p3 then 1 else 0)
| n -> make_pred 0 set stp
- end;;
+ end
+;;
let setp_table = Hashtbl.create 7;;
@@ -860,14 +923,16 @@ let add_setp stp char_set setp =
let char_set_tbl = Hashtbl.create 3 in
Hashtbl.add setp_table char_set char_set_tbl;
char_set_tbl in
- Hashtbl.add char_set_tbl stp setp;;
+ Hashtbl.add char_set_tbl stp setp
+;;
let find_setp stp char_set =
try Hashtbl.find (Hashtbl.find setp_table char_set) stp with
| Not_found ->
let setp = make_setp stp char_set in
add_setp stp char_set setp;
- setp;;
+ setp
+;;
let scan_chars_in_char_set stp char_set max ib =
let rec loop_pos1 cp1 max =
@@ -937,13 +1002,15 @@ let scan_chars_in_char_set stp char_set max ib =
| 3 when set.[1] != '-' -> loop_neg3 set.[0] set.[1] set.[2] max
| n -> loop (find_setp stp char_set) max end in
ignore_stoppers stp ib;
- max;;
+ max
+;;
let get_count t ib =
match t with
| 'l' -> Scanning.line_count ib
| 'n' -> Scanning.char_count ib
- | _ -> Scanning.token_count ib;;
+ | _ -> Scanning.token_count ib
+;;
let rec skip_whites ib =
let c = Scanning.peek_char ib in
@@ -952,21 +1019,24 @@ let rec skip_whites ib =
| ' ' | '\t' | '\n' | '\r' ->
Scanning.invalidate_current_char ib; skip_whites ib
| _ -> ()
- end;;
+ end
+;;
let list_iter_i f l =
let rec loop i = function
| [] -> ()
| [x] -> f i x (* Tail calling [f] *)
| x :: xs -> f i x; loop (succ i) xs in
- loop 0 l;;
+ loop 0 l
+;;
(* The global error report function for Scanf. *)
let scanf_bad_input ib = function
| Scan_failure s | Failure s ->
let i = Scanning.char_count ib in
bad_input (Printf.sprintf "scanf: bad input at char number %i: %s" i s)
- | x -> raise x;;
+ | x -> raise x
+;;
let ascanf sc fmt =
let ac = Tformat.ac_of_format fmt in
@@ -979,7 +1049,7 @@ let ascanf sc fmt =
Obj.magic (fun x y f -> sc fmt [| Obj.repr x; Obj.repr y; |] f)
| 3 ->
Obj.magic
- (fun x y z f -> sc fmt [| Obj.repr x; Obj.repr y; Obj.repr z; |] f)
+ (fun x y z f -> sc fmt [| Obj.repr x; Obj.repr y; Obj.repr z; |] f)
| nargs ->
let rec loop i args =
if i >= nargs then
@@ -987,7 +1057,8 @@ let ascanf sc fmt =
list_iter_i (fun i arg -> a.(nargs - i - 1) <- arg) args;
Obj.magic (fun f -> sc fmt a f)
else Obj.magic (fun x -> loop (succ i) (x :: args)) in
- loop 0 [];;
+ loop 0 []
+;;
(* The [scan_format] main scanning function.
It takes as arguments:
@@ -1125,7 +1196,7 @@ let scan_format ib ef fmt rv f =
incomplete_format bad_conversion conv fmt i in
let mf = Sformat.sub fmt (Sformat.index_of_int i) (j - 2 - i) in
(* Read the specified format string in the input buffer,
- and check its correction. *)
+ and check its correction. *)
let _x = scan_String max ib in
let rf = token_string ib in
if not (compatible_format_type rf mf) then format_mismatch rf mf else
@@ -1155,11 +1226,13 @@ let scan_format ib ef fmt rv f =
try snd (scan fmt 0 (fun () -> f) 0) with
| (Scan_failure _ | Failure _ | End_of_file) as exc ->
stack (delay ef ib) exc in
- return v;;
+ return v
+;;
let mkscanf ib ef fmt =
let sc = scan_format ib ef in
- ascanf sc fmt;;
+ ascanf sc fmt
+;;
let kscanf ib ef fmt = mkscanf ib ef fmt;;
@@ -1176,7 +1249,8 @@ let bscanf_format ib fmt f =
let fmt1 = ignore (scan_String max_int ib); token_string ib in
if not (compatible_format_type fmt1 fmt) then
format_mismatch fmt1 fmt else
- f (string_to_format fmt1);;
+ f (string_to_format fmt1)
+;;
let sscanf_format s fmt f = bscanf_format (Scanning.from_string s) fmt f;;
@@ -1185,7 +1259,9 @@ let quote_string s =
Buffer.add_char b '\"';
Buffer.add_string b s;
Buffer.add_char b '\"';
- Buffer.contents b;;
+ Buffer.contents b
+;;
let format_from_string s fmt =
- sscanf_format (quote_string s) fmt (fun x -> x);;
+ sscanf_format (quote_string s) fmt (fun x -> x)
+;;