summaryrefslogtreecommitdiffstats
path: root/stdlib
diff options
context:
space:
mode:
Diffstat (limited to 'stdlib')
-rw-r--r--stdlib/scanf.ml66
-rw-r--r--stdlib/scanf.mli4
2 files changed, 42 insertions, 28 deletions
diff --git a/stdlib/scanf.ml b/stdlib/scanf.ml
index 278ef1cf4..7230075c6 100644
--- a/stdlib/scanf.ml
+++ b/stdlib/scanf.ml
@@ -38,7 +38,7 @@ val cautious_peek_char : scanbuf -> char;;
the input or tries to read one if none has ever been read. *)
val checked_peek_char : scanbuf -> char;;
-(** Same as above but always returns a valid char instead of a null
+(** Same as above but always returns a valid char instead of a null
char when the reading method of the input buffer has reached end of
file. *)
@@ -65,6 +65,10 @@ val token_count : scanbuf -> int;;
(** [Scanning.token_count scanbuf] returns the number of tokens read
so far from [scanbuf]. *)
+val eof : scanbuf -> bool;;
+(** [Scanning.eof scanbuf] returns the current value of the end of input
+ condition of the given buffer, no validity test is performed. *)
+
val end_of_input : scanbuf -> bool;;
(** [Scanning.end_of_input scanbuf] tests the end of input condition
of the given buffer. *)
@@ -105,7 +109,9 @@ let next_char ib =
ib.eof <- true;;
let cautious_peek_char ib =
- if ib.bof then begin next_char ib; ib.bof <- false end;
+ if ib.bof then begin
+ next_char ib;
+ if ib.char_count > 0 then ib.bof <- false end;
ib.cur_char;;
(* Returns a valid current char for the input buffer. In particular
@@ -119,8 +125,11 @@ let checked_peek_char ib =
c;;
let peek_char ib = ib.cur_char;;
-let end_of_input ib = ib.eof;;
+let eof ib = ib.eof;;
let beginning_of_input ib = ib.bof;;
+let end_of_input ib =
+ let c = cautious_peek_char ib in
+ ib.eof;;
let char_count ib = ib.char_count;;
let reset_token ib = Buffer.reset ib.tokbuf;;
@@ -214,13 +223,15 @@ let scanf_bad_input ib = function
let bad_format fmt i fc =
invalid_arg
(Printf.sprintf
- "scanf: bad format %c, at char number %i of format %s" fc i fmt);;
+ "scanf: bad format %c, at char number %i in format %s" fc i fmt);;
(* Checking that the current char is indeed one of range, then skip it. *)
let check_char_in ib range =
let ci = Scanning.checked_peek_char ib in
if List.mem ci range then Scanning.next_char ib else
- bad_input (Printf.sprintf "looking for one of %s, found %c" "a range" ci);;
+ let sr = String.concat "" (List.map (String.make 1) range) in
+ bad_input
+ (Printf.sprintf "looking for one of range %s, found %c" sr ci);;
(* Checking that [c] is indeed in the input, then skip it. *)
let check_char ib c =
@@ -252,7 +263,7 @@ let token_int_literal conv ib =
| 'b' -> "0b" ^ Scanning.token ib
| _ -> assert false in
let l = String.length tok in
- if l = 0 || tok.[0] <> '+' then tok else String.sub tok 1 (l - 1);;
+ if l = 0 || tok.[0] <> '+' then tok else String.sub tok 1 (l - 1);;
(* All the functions that convert a string to a number raise the exception
Failure when the conversion is not possible.
@@ -284,7 +295,7 @@ let token_int64 conv ib = int64_of_string (token_int_literal conv ib);;
(* The decimal case is optimized. *)
let rec scan_decimal_digits max ib =
- if max = 0 || Scanning.end_of_input ib then max else
+ if max = 0 || Scanning.eof ib then max else
match Scanning.checked_peek_char ib with
| '0' .. '9' as c ->
let max = Scanning.store_char ib c max in
@@ -293,7 +304,7 @@ let rec scan_decimal_digits max ib =
(* Other cases uses a predicate argument to scan_digits. *)
let rec scan_digits digitp max ib =
- if max = 0 || Scanning.end_of_input ib then max else
+ if max = 0 || Scanning.eof ib then max else
match Scanning.checked_peek_char ib with
| c when digitp c ->
let max = Scanning.store_char ib c max in
@@ -320,7 +331,7 @@ let scan_hexadecimal_digits =
(* Decimal integers. *)
let scan_unsigned_decimal_int max ib =
- if max = 0 || Scanning.end_of_input ib then bad_input "decimal digit" else
+ if max = 0 || Scanning.eof ib then bad_input "decimal digit" else
scan_decimal_digits max ib;;
let scan_sign max ib =
@@ -342,7 +353,7 @@ let scan_unsigned_int max ib =
match Scanning.checked_peek_char ib with
| '0' as c ->
let max = Scanning.store_char ib c max in
- if max = 0 || Scanning.end_of_input ib then max else
+ if max = 0 || Scanning.eof ib then max else
let c = Scanning.peek_char ib in
begin match c with
| 'x' | 'X' -> scan_hexadecimal_digits (Scanning.store_char ib c max) ib
@@ -353,7 +364,7 @@ let scan_unsigned_int max ib =
let scan_optionally_signed_int max ib =
let max = scan_sign max ib in
- if max = 0 || Scanning.end_of_input ib then bad_input "bad int" else
+ if max = 0 || Scanning.eof ib then bad_input "bad int" else
scan_unsigned_int max ib;;
let scan_int conv max ib =
@@ -369,12 +380,12 @@ let scan_int conv max ib =
(* Scanning floating point numbers. *)
(* Fractional part is optional and can be reduced to 0 digits. *)
let scan_frac_part max ib =
- if max = 0 || Scanning.end_of_input ib then max else
+ if max = 0 || Scanning.eof ib then max else
scan_unsigned_decimal_int max ib;;
(* Exp part is optional and can be reduced to 0 digits. *)
let scan_exp_part max ib =
- if max = 0 || Scanning.end_of_input ib then max else
+ if max = 0 || Scanning.eof ib then max else
let c = Scanning.peek_char ib in
match c with
| 'e' | 'E' as c ->
@@ -383,7 +394,7 @@ let scan_exp_part max ib =
let scan_float max ib =
let max = scan_optionally_signed_decimal_int max ib in
- if max = 0 || Scanning.end_of_input ib then max else
+ if max = 0 || Scanning.eof ib then max else
let c = Scanning.peek_char ib in
match c with
| '.' ->
@@ -397,7 +408,7 @@ let scan_float max ib =
characters has been read.*)
let scan_string stp max ib =
let rec loop max =
- if max = 0 || Scanning.end_of_input ib then max else
+ if max = 0 || Scanning.eof ib then max else
let c = Scanning.checked_peek_char ib in
if stp = [] then
match c with
@@ -445,7 +456,7 @@ let char_for_decimal_code c0 c1 c2 =
(* Called when encountering '\\' as starter of a char.
Stops before the corresponding '\''. *)
let scan_backslash_char max ib =
- if max = 0 || Scanning.end_of_input ib then bad_input "a char" else
+ if max = 0 || Scanning.eof ib then bad_input "a char" else
let c = Scanning.peek_char ib in
match c with
| '\\' | '\'' | '"' | 'n' | 't' | 'b' | 'r' (* '"' helping Emacs *) ->
@@ -465,7 +476,7 @@ let scan_backslash_char max ib =
let scan_Char max ib =
let rec loop s max =
- if max = 0 || Scanning.end_of_input ib then bad_input "a char" else
+ if max = 0 || Scanning.eof ib then bad_input "a char" else
let c = Scanning.checked_peek_char ib in
match c, s with
| '\'', 3 -> Scanning.next_char ib; loop 2 (max - 1)
@@ -478,7 +489,7 @@ let scan_Char max ib =
let scan_String max ib =
let rec loop s max =
- if max = 0 || Scanning.end_of_input ib then bad_input "a string" else
+ if max = 0 || Scanning.eof ib then bad_input "a string" else
let c = Scanning.checked_peek_char ib in
match c, s with
| '"', true (* '"' helping Emacs *) ->
@@ -490,21 +501,19 @@ let scan_String max ib =
| c, false -> loop false (Scanning.store_char ib c max)
| c, _ -> bad_input_char c
and skip_spaces s max =
- if max = 0 || Scanning.end_of_input ib then bad_input "a string" else
+ if max = 0 || Scanning.eof ib then bad_input "a string" else
let c = Scanning.checked_peek_char ib in
match c, s with
| '\n', true
| ' ', false ->
Scanning.next_char ib; skip_spaces false (max - 1)
| '\\', false -> loop false max
-(* | '\\', true*)
| c, false -> loop false (Scanning.store_char ib c max)
-(* | ' ', _ -> bad_input_char c*)
| _, _ -> loop false (scan_backslash_char (max - 1) ib) in
loop true max;;
let scan_bool max ib =
- if max < 4 || Scanning.end_of_input ib then bad_input "a boolean" else
+ if max < 4 || Scanning.eof ib then bad_input "a boolean" else
let m =
match Scanning.checked_peek_char ib with
| 't' -> 4
@@ -572,7 +581,7 @@ let scan_chars_in_char_set stp char_set max ib =
let setp = make_setp stp char_set in
let rec loop max =
let c = Scanning.cautious_peek_char ib in
- if max = 0 || Scanning.end_of_input ib then max else
+ if max = 0 || Scanning.eof ib then max else
if setp c then loop (Scanning.store_char ib c max) else
max in
let max = loop max in
@@ -583,9 +592,9 @@ let skip_whites ib =
let rec loop = function
| ' ' | '\t' | '\n' | '\r' ->
Scanning.next_char ib;
- if not (Scanning.end_of_input ib) then loop (Scanning.peek_char ib)
+ if not (Scanning.eof ib) then loop (Scanning.peek_char ib)
| _ -> () in
- if not (Scanning.end_of_input ib) then
+ if not (Scanning.eof ib) then
loop (Scanning.cautious_peek_char ib);;
(* Main scanning function:
@@ -632,7 +641,6 @@ let kscanf ib ef fmt f =
| _ -> scan_fmt_fixed_width false f i
and scan_fmt_fixed_width skip f i =
- if i > lim then bad_format fmt i '%' else
match fmt.[i] with
| '0' .. '9' as c ->
let rec read_width accu i =
@@ -656,8 +664,9 @@ let kscanf ib ef fmt f =
let c = Scanning.checked_peek_char ib in
scan_fmt (stack f c) (i + 1)
| 'c' | 'C' as conv ->
+ if max <> 1 && max <> max_int then bad_format fmt i conv else
let x =
- if conv = 'c' then scan_char max ib else scan_Char max ib in
+ if conv = 'c' then scan_char max ib else scan_Char max ib in
scan_fmt (stack f (token_char ib)) (i + 1)
| 'd' | 'i' | 'o' | 'u' | 'x' | 'X' as conv ->
let x = scan_int conv max ib in
@@ -698,6 +707,9 @@ let kscanf ib ef fmt f =
| 'N' ->
let x = Scanning.token_count ib in
scan_fmt (stack f x) (i + 1)
+ | '!' as c ->
+ if Scanning.end_of_input ib then scan_fmt f (i + 1)
+ else bad_input "end of input not found"
| c -> bad_format fmt i c
and scan_fmt_stoppers i =
diff --git a/stdlib/scanf.mli b/stdlib/scanf.mli
index 3ef6688d8..3b9c1da41 100644
--- a/stdlib/scanf.mli
+++ b/stdlib/scanf.mli
@@ -118,7 +118,8 @@ val bscanf :
escaped characters follow the lexical conventions of Caml).
- [c]: reads a single character. To test the current input character
without reading it, specify a null field width, i.e. use
- specification [%0c].
+ specification [%0c]. Raise [Invalid_argument], if the field width
+ specification is greater than 1.
- [C]: reads a single delimited character (delimiters and special
escaped characters follow the lexical conventions of Caml).
- [f], [e], [E], [g], [G], [F]: reads an optionally signed
@@ -143,6 +144,7 @@ val bscanf :
[\[^\]\]] matches any character that is not [\]].
- [n]: applies [f] to the number of characters read so far.
- [N]: applies [f] to the number of tokens read so far.
+ - [!]: matches the end of input condition.
- [%]: matches one [%] character in the input.
Following the [%] character introducing a conversion, there may be