summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPierre Weis <Pierre.Weis@inria.fr>2006-04-05 11:49:07 +0000
committerPierre Weis <Pierre.Weis@inria.fr>2006-04-05 11:49:07 +0000
commit176b3c0da8acab2483256770b1a1e212b672885f (patch)
tree3dae46d2ec99ebddde3f7530e899e2aecb72bedd
parentb9b6e0fa2cb9155fccdfe066caea70ddd68a69f6 (diff)
Module Sformat is internal to printf. Better typing specifications of functions working with format strings.
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@7374 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
-rw-r--r--stdlib/format.ml46
-rw-r--r--stdlib/printf.ml149
-rw-r--r--stdlib/printf.mli39
-rw-r--r--stdlib/scanf.ml147
-rw-r--r--stdlib/scanf.mli2
-rw-r--r--stdlib/sys.ml2
6 files changed, 249 insertions, 136 deletions
diff --git a/stdlib/format.ml b/stdlib/format.ml
index 2fabff2e7..f4b519c8d 100644
--- a/stdlib/format.ml
+++ b/stdlib/format.ml
@@ -910,14 +910,16 @@ and set_tags =
**************************************************************)
+module Sformat = Printf.Sformat;;
+
(* Error messages when processing formats. *)
(* Trailer: giving up at character number ... *)
let giving_up mess fmt i =
- "fprintf: " ^ mess ^ " ``" ^ fmt ^ "'', \
+ "fprintf: " ^ mess ^ " ``" ^ Sformat.to_string fmt ^ "'', \
giving up at character number " ^ string_of_int i ^
- (if i < String.length fmt
- then " (" ^ String.make 1 fmt.[i] ^ ")."
+ (if i < Sformat.length fmt
+ then " (" ^ String.make 1 (Sformat.get fmt i) ^ ")."
else String.make 1 '.');;
(* When an invalid format deserves a special error explanation. *)
@@ -963,8 +965,6 @@ let implode_rev s0 = function
| [] -> s0
| l -> String.concat "" (List.rev (s0 :: l));;
-external format_to_string : ('a, 'b, 'c, 'd) format4 -> string = "%identity";;
-
(* [fprintf_out] is the printf-like function generator: given the
- [to_s] flag that tells if we are printing into a string,
- the [get_out] function that has to be called at the end of formatting,
@@ -975,8 +975,7 @@ external format_to_string : ('a, 'b, 'c, 'd) format4 -> string = "%identity";;
applications of [fprintf_out]. *)
let mkprintf to_s get_out =
let rec kprintf k fmt =
- let fmt = format_to_string fmt in
- let len = String.length fmt in
+ let len = Sformat.length fmt in
let kpr fmt v =
let ppf = get_out fmt in
@@ -996,13 +995,13 @@ let mkprintf to_s get_out =
let rec doprn n i =
if i >= len then Obj.magic (k ppf) else
- match fmt.[i] with
+ match Sformat.get fmt i with
| '%' ->
Printf.scan_format fmt v n i cont_s cont_a cont_t cont_f cont_m
| '@' ->
let i = succ i in
if i >= len then invalid_format fmt i else
- begin match fmt.[i] with
+ begin match Sformat.get fmt i with
| '[' ->
do_pp_open_box ppf n (succ i)
| ']' ->
@@ -1065,7 +1064,7 @@ let mkprintf to_s get_out =
and get_int n i c =
if i >= len then invalid_integer fmt i else
- match fmt.[i] with
+ match Sformat.get fmt i with
| ' ' -> get_int n (succ i) c
| '%' ->
let cont_s n s i = c (format_int_of_string fmt i s) n i
@@ -1077,33 +1076,34 @@ let mkprintf to_s get_out =
| _ ->
let rec get j =
if j >= len then invalid_integer fmt j else
- match fmt.[j] with
+ match Sformat.get fmt j with
| '0' .. '9' | '-' -> get (succ j)
| _ ->
let size =
if j = i then size_of_int 0 else
- format_int_of_string fmt j (String.sub fmt i (j - i)) in
+ let s = Sformat.sub fmt i (j - i) in
+ format_int_of_string fmt j s in
c size n j in
get i
and skip_gt i =
if i >= len then invalid_format fmt i else
- match fmt.[i] with
+ match Sformat.get fmt i with
| ' ' -> skip_gt (succ i)
| '>' -> succ i
| _ -> invalid_format fmt i
and get_box_kind i =
if i >= len then Pp_box, i else
- match fmt.[i] with
+ match Sformat.get fmt i with
| 'h' ->
let i = succ i in
if i >= len then Pp_hbox, i else
- begin match fmt.[i] with
+ begin match Sformat.get fmt i with
| 'o' ->
let i = succ i in
if i >= len then format_invalid_arg "bad box format" fmt i else
- begin match fmt.[i] with
+ begin match Sformat.get fmt i with
| 'v' -> Pp_hovbox, succ i
| c ->
format_invalid_arg
@@ -1118,11 +1118,11 @@ let mkprintf to_s get_out =
and get_tag_name n i c =
let rec get accu n i j =
if j >= len
- then c (implode_rev (String.sub fmt i (j - i)) accu) n j else
- match fmt.[j] with
- | '>' -> c (implode_rev (String.sub fmt i (j - i)) accu) n j
+ then c (implode_rev (Sformat.sub fmt i (j - i)) accu) n j else
+ match Sformat.get fmt j with
+ | '>' -> c (implode_rev (Sformat.sub fmt i (j - i)) accu) n j
| '%' ->
- let s0 = String.sub fmt i (j - i) in
+ let s0 = Sformat.sub fmt i (j - i) in
let cont_s n s i = get (s :: s0 :: accu) n i i
and cont_a n printer arg i =
let s =
@@ -1146,7 +1146,7 @@ let mkprintf to_s get_out =
and do_pp_break ppf n i =
if i >= len then begin pp_print_space ppf (); doprn n i end else
- match fmt.[i] with
+ match Sformat.get fmt i with
| '<' ->
let rec got_nspaces nspaces n i =
get_int n i (got_offset nspaces)
@@ -1158,7 +1158,7 @@ let mkprintf to_s get_out =
and do_pp_open_box ppf n i =
if i >= len then begin pp_open_box_gen ppf 0 Pp_box; doprn n i end else
- match fmt.[i] with
+ match Sformat.get fmt i with
| '<' ->
let kind, i = get_box_kind (succ i) in
let got_size size n i =
@@ -1169,7 +1169,7 @@ let mkprintf to_s get_out =
and do_pp_open_tag ppf n i =
if i >= len then begin pp_open_tag ppf ""; doprn n i end else
- match fmt.[i] with
+ match Sformat.get fmt i with
| '<' ->
let got_name tag_name n i =
pp_open_tag ppf tag_name;
diff --git a/stdlib/printf.ml b/stdlib/printf.ml
index fe42ec861..1acb41b6e 100644
--- a/stdlib/printf.ml
+++ b/stdlib/printf.ml
@@ -13,14 +13,33 @@
(* $Id$ *)
-external format_int: string -> int -> string = "caml_format_int"
-external format_int32: string -> int32 -> string = "caml_int32_format"
+external format_float: string -> float -> string
+ = "caml_format_float"
+external format_int: string -> int -> string
+ = "caml_format_int"
+external format_int32: string -> int32 -> string
+ = "caml_int32_format"
external format_nativeint: string -> nativeint -> string
- = "caml_nativeint_format"
-external format_int64: string -> int64 -> string = "caml_int64_format"
-external format_float: string -> float -> string = "caml_format_float"
-
-external format_to_string: ('a, 'b, 'c, 'd) format4 -> string = "%identity"
+ = "caml_nativeint_format"
+external format_int64: string -> int64 -> string
+ = "caml_int64_format"
+
+module Sformat = struct
+ external unsafe_to_string : ('a, 'b, 'c, 'd) format4 -> string
+ = "%identity"
+ external length : ('a, 'b, 'c, 'd) format4 -> int
+ = "%string_length"
+ external get : ('a, 'b, 'c, 'd) format4 -> int -> char
+ = "%string_safe_get"
+ external unsafe_get : ('a, 'b, 'c, 'd) format4 -> int -> char
+ = "%string_unsafe_get"
+(* external set : ('a, 'b, 'c, 'd) format4 -> int -> char -> unit
+ = "%string_safe_set"
+ external unsafe_set : ('a, 'b, 'c, 'd) format4 -> int -> char -> unit
+ = "%string_unsafe_set" *)
+ let sub fmt idx len = String.sub (unsafe_to_string fmt) idx len
+ let to_string fmt = sub fmt 0 (length fmt)
+end;;
type index;;
@@ -32,28 +51,33 @@ let succ_index = add_int_index 1;;
(* Litteral position are one-based (hence pred p instead of p). *)
let index_of_litteral_position p = index_of_int (pred p);;
-let bad_conversion fmt i c =
+let bad_conversion sfmt i c =
invalid_arg
("printf: bad conversion %" ^ String.make 1 c ^ ", at char number " ^
- string_of_int i ^ " in format string ``" ^ fmt ^ "''");;
+ string_of_int i ^ " in format string ``" ^ sfmt ^ "''");;
+
+let bad_conversion_format fmt i c =
+ bad_conversion (Sformat.to_string fmt) i c;;
let incomplete_format fmt =
invalid_arg
- ("printf: premature end of format string ``" ^ fmt ^ "''");;
+ ("printf: premature end of format string ``" ^
+ Sformat.to_string fmt ^ "''");;
(* Parses a format to return the specified length and the padding direction. *)
-let parse_format fmt =
+let parse_format sfmt =
let rec parse neg i =
- if i >= String.length fmt then (0, neg) else
- match String.unsafe_get fmt i with
+ if i >= String.length sfmt then (0, neg) else
+ match String.unsafe_get sfmt i with
| '1'..'9' ->
- (int_of_string (String.sub fmt i (String.length fmt - i - 1)),
+ (int_of_string
+ (String.sub sfmt i (String.length sfmt - i - 1)),
neg)
| '-' ->
parse true (succ i)
| _ ->
parse neg (succ i) in
- try parse false 1 with Failure _ -> bad_conversion fmt 0 's'
+ try parse false 1 with Failure _ -> bad_conversion sfmt 0 's'
(* Pad a (sub) string into a blank string of length [p],
on the right if [neg] is true, on the left otherwise. *)
@@ -68,18 +92,19 @@ let pad_string pad_char p neg s i len =
(* Format a string given a %s format, e.g. %40s or %-20s.
To do: ignore other flags (#, +, etc)? *)
-let format_string fmt s =
- let (p, neg) = parse_format fmt in
- pad_string ' ' p neg s 0 (String.length s)
+let format_string sfmt s =
+ let (p, neg) = parse_format sfmt in
+ pad_string ' ' p neg s 0 (String.length s);;
-(* Extract a %format from [fmt] between [start] and [stop] inclusive.
- '*' in the format are replaced by integers taken from the [widths] list. *)
+(* Extract a format string out of [fmt] between [start] and [stop] inclusive.
+ '*' in the format are replaced by integers taken from the [widths] list.
+ extract_format returns a string. *)
let extract_format fmt start stop widths =
let skip_positional_spec start =
- match String.unsafe_get fmt start with
+ match Sformat.unsafe_get fmt start with
| '0'..'9' ->
let rec skip_int_litteral i =
- match String.unsafe_get fmt i with
+ match Sformat.unsafe_get fmt i with
| '0'..'9' -> skip_int_litteral (succ i)
| '$' -> succ i
| _ -> start in
@@ -90,7 +115,7 @@ let extract_format fmt start stop widths =
Buffer.add_char b '%';
let rec fill_format i widths =
if i <= stop then
- match (String.unsafe_get fmt i, widths) with
+ match (Sformat.unsafe_get fmt i, widths) with
| ('*', h :: t) ->
Buffer.add_string b (string_of_int h);
let i = skip_positional_spec (succ i) in
@@ -102,10 +127,13 @@ let extract_format fmt start stop widths =
fill_format start (List.rev widths);
Buffer.contents b;;
-let format_int_with_conv conv fmt i =
+let extract_format_int conv fmt start stop widths =
+ let sfmt = extract_format fmt start stop widths in
match conv with
- | 'n' | 'N' -> fmt.[String.length fmt - 1] <- 'u'; format_int fmt i
- | _ -> format_int fmt i
+ | 'n' | 'N' ->
+ sfmt. [String.length sfmt - 1] <- 'u';
+ sfmt
+ | _ -> sfmt;;
(* Returns the position of the last character of the meta format
string, starting from position [i], inside a given format [fmt].
@@ -114,34 +142,36 @@ let format_int_with_conv conv fmt i =
%) (when [conv = '(']). Hence, [sub_format] returns the index of
the character ')' or '}' that ends the meta format, according to
the character [conv]. *)
-let sub_format incomplete_format bad_conversion conv fmt i =
- let len = String.length fmt in
+let sub_format incomplete_format bad_conversion_format conv fmt i =
+ let len = Sformat.length fmt in
let rec sub_fmt c i =
let close = if c = '(' then ')' else (* '{' *) '}' in
let rec sub j =
if j >= len then incomplete_format fmt else
- match fmt.[j] with
+ match Sformat.get fmt j with
| '%' -> sub_sub (succ j)
| _ -> sub (succ j)
and sub_sub j =
if j >= len then incomplete_format fmt else
- match fmt.[j] with
+ match Sformat.get fmt j with
| '(' | '{' as c ->
let j = sub_fmt c (succ j) in sub (succ j)
| '}' | ')' as c ->
- if c = close then j else bad_conversion fmt i c
+ if c = close then j else bad_conversion_format fmt i c
| _ -> sub (succ j) in
sub i in
sub_fmt conv i;;
-let sub_format_for_printf = sub_format incomplete_format bad_conversion;;
+let sub_format_for_printf conv =
+ sub_format incomplete_format bad_conversion_format conv;;
let iter_on_format_args fmt add_conv add_char =
- let lim = String.length fmt - 1 in
+
+ let lim = Sformat.length fmt - 1 in
let rec scan_flags skip i =
if i > lim then incomplete_format fmt else
- match String.unsafe_get fmt i with
+ match Sformat.unsafe_get fmt i with
| '*' -> scan_flags skip (add_conv skip i 'i')
| '$' -> scan_flags skip (succ i)
| '#' | '-' | ' ' | '+' -> scan_flags skip (succ i)
@@ -151,7 +181,7 @@ let iter_on_format_args fmt add_conv add_char =
| _ -> scan_conv skip i
and scan_conv skip i =
if i > lim then incomplete_format fmt else
- match String.unsafe_get fmt i with
+ match Sformat.unsafe_get fmt i with
| '%' | '!' -> succ i
| 's' | 'S' | '[' -> add_conv skip i 's'
| 'c' | 'C' -> add_conv skip i 'c'
@@ -162,7 +192,7 @@ let iter_on_format_args fmt add_conv add_char =
| 'l' | 'n' | 'L' as conv ->
let j = succ i in
if j > lim then add_conv skip i 'i' else begin
- match fmt.[j] with
+ match Sformat.get fmt j with
| 'd' | 'i' | 'o' | 'x' | 'X' | 'u' ->
add_char (add_conv skip i conv) 'i'
| c -> add_conv skip i 'i' end
@@ -172,7 +202,7 @@ let iter_on_format_args fmt add_conv add_char =
let j = sub_format_for_printf conv fmt i in
(* Add the meta specification anyway. *)
let rec loop i =
- if i < j - 1 then loop (add_char i fmt.[i]) in
+ if i < j - 1 then loop (add_char i (Sformat.get fmt i)) in
loop i;
scan_conv skip j
| '(' as conv ->
@@ -181,11 +211,11 @@ let iter_on_format_args fmt add_conv add_char =
anyway. *)
scan_fmt (add_conv skip i conv)
| '}' | ')' as conv -> add_conv skip i conv
- | conv -> bad_conversion fmt i conv
+ | conv -> bad_conversion_format fmt i conv
and scan_fmt i =
if i < lim then
- if fmt.[i] = '%'
+ if Sformat.get fmt i = '%'
then scan_fmt (scan_flags false (succ i))
else scan_fmt (succ i)
else i in
@@ -197,7 +227,7 @@ let iter_on_format_args fmt add_conv add_char =
It also checks the well-formedness of the format string.
For instance, [summarize_format_type "A number %d\n"] is "%i". *)
let summarize_format_type fmt =
- let len = String.length fmt in
+ let len = Sformat.length fmt in
let b = Buffer.create len in
let add_char i c = Buffer.add_char b c; succ i in
let add_conv skip i c =
@@ -278,10 +308,10 @@ type param_spec = Spec_none | Spec_index of index;;
problems: the type would be dependant of the {\em value} of an integer
argument to printf. *)
let scan_positional_spec fmt got_pos n i =
- match String.unsafe_get fmt i with
+ match Sformat.unsafe_get fmt i with
| '0'..'9' as d ->
let rec get_int_litteral accu j =
- match String.unsafe_get fmt j with
+ match Sformat.unsafe_get fmt j with
| '0'..'9' as d ->
get_int_litteral (10 * accu + (int_of_char d - 48)) (succ j)
| '$' ->
@@ -320,8 +350,8 @@ let get_index spec n =
"next pos" is the position in [fmt] of the first character following
the %format in [fmt]. *)
-(* Note: here, rather than test explicitly against [String.length fmt]
- to detect the end of the format, we use [String.unsafe_get] and
+(* Note: here, rather than test explicitly against [Sformat.length fmt]
+ to detect the end of the format, we use [Sformat.unsafe_get] and
rely on the fact that we'll get a "nul" character if we access
one past the end of the string. These "nul" characters are then
caught by the [_ -> bad_conversion] clauses below.
@@ -335,7 +365,7 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m =
scan_positional_spec fmt got_pos n i
and scan_flags spec n widths i =
- match String.unsafe_get fmt i with
+ match Sformat.unsafe_get fmt i with
| '*' ->
let got_pos wspec i =
let (width : int) = get_arg wspec n in
@@ -346,7 +376,7 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m =
| _ -> scan_conv spec n widths i
and scan_conv spec n widths i =
- match String.unsafe_get fmt i with
+ match Sformat.unsafe_get fmt i with
| '%' ->
cont_s n "%" (succ i)
| 's' | 'S' as conv ->
@@ -364,7 +394,8 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m =
cont_s (next_index spec n) s (succ i)
| 'd' | 'i' | 'o' | 'x' | 'X' | 'u' | 'N' as conv ->
let (x : int) = get_arg spec n in
- let s = format_int_with_conv conv (extract_format fmt pos i widths) x in
+ let s =
+ format_int (extract_format_int conv fmt pos i widths) x in
cont_s (next_index spec n) s (succ i)
| 'f' | 'e' | 'E' | 'g' | 'G' | 'F' as conv ->
let (x : float) = get_arg spec n in
@@ -387,7 +418,7 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m =
let printer = get_arg spec n in
cont_t (next_index spec n) printer (succ i)
| 'l' | 'n' | 'L' as conv ->
- begin match String.unsafe_get fmt (succ i) with
+ begin match Sformat.unsafe_get fmt (succ i) with
| 'd' | 'i' | 'o' | 'x' | 'X' | 'u' ->
let s =
match conv with
@@ -403,10 +434,8 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m =
cont_s (next_index spec n) s (i + 2)
| _ ->
let (x : int) = get_arg spec n in
- cont_s
- (next_index spec n)
- (format_int_with_conv 'n' (extract_format fmt pos i widths) x)
- (succ i)
+ let s = format_int (extract_format_int 'n' fmt pos i widths) x in
+ cont_s (next_index spec n) s (succ i)
end
| '!' -> cont_f n (succ i)
| '{' | '(' as conv (* ')' '}' *) ->
@@ -417,31 +446,31 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m =
(* Just print the format argument as a specification. *)
cont_s
(next_index spec n)
- (summarize_format_type (format_to_string xf))
+ (summarize_format_type xf)
j else
(* Use the format argument instead of the format specification. *)
cont_m (next_index spec n) xf j
| (* '(' *) ')' ->
cont_s n "" (succ i)
| conv ->
- bad_conversion fmt i conv in
+ bad_conversion_format fmt i conv in
scan_positional n [] (succ pos);;
let mkprintf str get_out outc outs flush k fmt =
- let fmt = format_to_string fmt in
+(* let fmt = Sformat.length fmt in*)
(* out is global to this invocation of pr, and must be shared by all its
recursive calls (fif) any. *)
let out = get_out fmt in
let rec pr k n fmt v =
- let len = String.length fmt in
+ let len = Sformat.length fmt in
let rec doprn n i =
if i >= len then Obj.magic (k out) else
- match String.unsafe_get fmt i with
+ match Sformat.unsafe_get fmt i with
| '%' -> scan_format fmt v n i cont_s cont_a cont_t cont_f cont_m
| c -> outc out c; doprn n (succ i)
and cont_s n s i =
@@ -461,8 +490,8 @@ let mkprintf str get_out outc outs flush k fmt =
and cont_f n i =
flush out; doprn n i
and cont_m n xf i =
- let m = add_int_index (nargs_of_format_type (format_to_string xf)) n in
- pr (Obj.magic (fun _ -> doprn m i)) n (format_to_string xf) v in
+ let m = add_int_index (nargs_of_format_type xf) n in
+ pr (Obj.magic (fun _ -> doprn m i)) n xf v in
doprn n 0 in
@@ -481,7 +510,7 @@ let kbprintf k b =
let bprintf b = kbprintf ignore b
let get_buff fmt =
- let len = 2 * String.length fmt in
+ let len = 2 * Sformat.length fmt in
Buffer.create len;;
let get_contents b =
diff --git a/stdlib/printf.mli b/stdlib/printf.mli
index fe4b8b2f8..d612f4ad3 100644
--- a/stdlib/printf.mli
+++ b/stdlib/printf.mli
@@ -142,15 +142,34 @@ type index;;
external index_of_int : int -> index = "%identity";;
-val scan_format : string -> 'a array -> index -> int ->
- (index -> string -> int -> 'b) ->
- (index -> 'c -> 'd -> int -> 'b) ->
- (index -> 'e -> int -> 'b) ->
- (index -> int -> 'b) ->
- (index -> ('h, 'i, 'j, 'k) format4 -> int -> 'b) -> 'b
+module Sformat : sig
+ external unsafe_to_string : ('a, 'b, 'c, 'd) format4 -> string
+ = "%identity"
+ external length : ('a, 'b, 'c, 'd) format4 -> int
+ = "%string_length"
+ external get : ('a, 'b, 'c, 'd) format4 -> int -> char
+ = "%string_safe_get"
+ external unsafe_get : ('a, 'b, 'c, 'd) format4 -> int -> char
+ = "%string_unsafe_get"
+ val sub : ('a, 'b, 'c, 'd) format4 -> int -> int -> string
+ val to_string : ('a, 'b, 'c, 'd) format4 -> string
+end
+
+val scan_format : ('a, 'b, 'c, 'd) format4 ->
+ 'e array ->
+ index ->
+ int ->
+ (index -> string -> int -> 'f) ->
+ (index -> 'g -> 'h -> int -> 'f) ->
+ (index -> 'i -> int -> 'f) ->
+ (index -> int -> 'f) ->
+ (index -> ('j, 'k, 'l, 'm) format4 -> int -> 'f) -> 'f
val sub_format :
- (string -> int) -> (string -> int -> char -> int) ->
- char -> string -> int -> int
-val summarize_format_type : string -> string
-val kapr : (string -> Obj.t array -> 'a) -> string -> 'a
+ (('a, 'b, 'c, 'd) format4 -> int) ->
+ (('a, 'b, 'c, 'd) format4 -> int -> char -> int) ->
+ char -> ('a, 'b, 'c, 'd) format4 -> int -> int
+val summarize_format_type : ('a, 'b, 'c, 'd) format4 -> string
+val kapr :
+ (('a, 'b, 'c, 'd) format4 -> Obj.t array -> 'e) ->
+ ('a, 'b, 'c, 'd) format4 -> 'e
diff --git a/stdlib/scanf.ml b/stdlib/scanf.ml
index 030075ba3..a0a49c061 100644
--- a/stdlib/scanf.ml
+++ b/stdlib/scanf.ml
@@ -217,7 +217,11 @@ let from_function = create "function input";;
(* Perform bufferized input to improve efficiency. *)
let file_buffer_size = ref 1024;;
-let from_file_channel fname ic =
+(* To close a channel at end of input. *)
+let scan_close_at_end ic = close_in ic; raise End_of_file;;
+
+(* Scan from an input channel. *)
+let from_ic scan_close_ic fname ic =
let len = !file_buffer_size in
let buf = String.create len in
let i = ref 0 in
@@ -225,29 +229,90 @@ let from_file_channel fname ic =
let next () =
if !i < !lim then begin let c = buf.[!i] in incr i; c end else begin
lim := input ic buf 0 len;
- if !lim = 0 then raise End_of_file else begin
+ if !lim = 0 then scan_close_ic ic else begin
i := 1;
buf.[0]
end
end in
create fname next;;
-let from_file fname = from_file_channel fname (open_in fname);;
-let from_file_bin fname = from_file_channel fname (open_in_bin fname);;
+let from_ic_close_at_end = from_ic scan_close_at_end;;
-let from_input_channel fname ic =
- let next () = input_char ic in
- create fname next;;
+let from_file fname = from_ic_close_at_end fname (open_in fname);;
+let from_file_bin fname = from_ic_close_at_end fname (open_in_bin fname);;
+
+(* Input channel [ic] is not allocated here, hence it may be shared (two
+ functions of the program may successively read from it). Furthermore, the
+ user may define more than one scanning buffer reading from the same [ic]
+ channel.
+
+ However, we cannot prevent scanf to use one lookahead character if needed;
+ this implies that multiple functions alternatively scanning the same [ic]
+ channel will miss characters from time to time, due to unnoticed look ahead
+ characters, silently read from [ic] (hence no more available for reading)
+ and retained inside the scanning buffer for correct scanning from the same
+ scanning buffer. This phenomenon is even worse in case of multiple
+ definition of scanning buffers from the same [ic].
+
+ Hence, we do bufferize characters to create a scnning buffer from an input
+ channel in order to preserve the same semantics as other from_* functions
+ above: two successive calls to the scanner will work appropriately, since
+ the bufferized character (if any) will be retained inside the scanning
+ buffer from a call to the next one.
-let from_channel = from_input_channel "input channel";;
+ Otherwise, if we do not bufferize characters, we will loose the clearly
+ correct scanning behaviour even for the simple regular case, when we scan
+ the (possibly shared) channel [ic] using a unique function, while not
+ gaining anything for multiple functions reading from [ic] or multiple
+ allocation of scanning buffers reading from the same [ic].
-(* The scanning buffer reading from [stdin].*)
-let stdib = from_input_channel "stdin" stdin;;
+ A more ambitious fix could be to have a memo scanning buffer allocation
+ for reading from input channel not allocated from within Scanf. *)
+
+let scan_at_end ic = raise End_of_file;;
+
+let from_channel = from_ic scan_at_end "input channel";;
+
+(* The scanning buffer reading from [stdin].
+ One could try to define stdib as from_channel stdin,
+ but unfortunately the toplevel interaction would be wrong.
+ This is due to some kind of ``race condition'' when reading from stdin,
+ since the interactive compiler and scanf will simultaneously read the
+ material they need from stdin; then, confusion will result from what should
+ be read by the toplevel and what should be read by scanf.
+ This is even more complicated by the one character lookahead that scanf
+ is sometimes obliged to maintain: the lookahead character will be available
+ for the next (scanf) entry, seamingly coming from nowhere.
+ Also no End_of_file is raised when reading from stdin: if not enough
+ characters have been read, we simply ask to read more. *)
+(*let stdib =
+ let buf = ref ""
+ and len = ref 0 in
+ let mk_buff l =
+ buf := String.create l;
+ len := l in
+ let i = ref 0 in
+ let rec next () =
+ if !i < !len then begin let c = !buf.[!i] in incr i; c end else
+ let s = input_line stdin in
+ let ls = String.length s in
+ if ls > !len then mk_buff ls;
+ String.blit s 0 !buf 0 ls;
+ i := 0;
+ next () in
+ create "stdin" next;;
+*)
+let stdib = from_ic scan_at_end "stdin" stdin;;
end;;
(* Formatted input functions. *)
+module Sformat = Printf.Sformat;;
+
+external string_to_format :
+ string -> ('a, 'b, 'c, 'd) format4 = "%identity";;
+
(* Reporting errors. *)
exception Scan_failure of string;;
@@ -267,11 +332,12 @@ let bad_conversion fmt i c =
invalid_arg
(Printf.sprintf
"scanf: bad conversion %%%c, at char number %i \
- in format string ``%s''" c i fmt);;
+ in format string ``%s''" c i (Sformat.to_string fmt));;
let incomplete_format fmt =
invalid_arg
- (Printf.sprintf "scanf: premature end of format string ``%s''" fmt);;
+ (Printf.sprintf "scanf: premature end of format string ``%s''"
+ (Sformat.to_string fmt));;
let bad_float () = bad_input "no dot or exponent part found in float token";;
@@ -283,7 +349,8 @@ let format_mismatch fmt1 fmt2 ib =
(* Checking that 2 format string are type compatible. *)
let compatible_format_type fmt1 fmt2 =
- Printf.summarize_format_type fmt1 = Printf.summarize_format_type fmt2;;
+ Printf.summarize_format_type (string_to_format fmt1) =
+ Printf.summarize_format_type (string_to_format fmt2);;
(* Checking that [c] is indeed in the input, then skips it.
In this case, the character c has been explicitely specified in the
@@ -347,9 +414,12 @@ let token_float ib = float_of_string (Scanning.token ib);;
since those modules are not available to Scanf.
However, we can bind and use the corresponding primitives that are
available in the runtime. *)
-external nativeint_of_string: string -> nativeint = "caml_nativeint_of_string";;
-external int32_of_string : string -> int32 = "caml_int32_of_string";;
-external int64_of_string : string -> int64 = "caml_int64_of_string";;
+external nativeint_of_string : string -> nativeint
+ = "caml_nativeint_of_string";;
+external int32_of_string : string -> int32
+ = "caml_int32_of_string";;
+external int64_of_string : string -> int64
+ = "caml_int64_of_string";;
let token_nativeint conv ib = nativeint_of_string (token_int_literal conv ib);;
let token_int32 conv ib = int32_of_string (token_int_literal conv ib);;
@@ -648,29 +718,29 @@ type char_set =
(* Char sets are read as sub-strings in the format string. *)
let read_char_set fmt i =
- let lim = String.length fmt - 1 in
+ let lim = Sformat.length fmt - 1 in
let rec find_in_set j =
if j > lim then incomplete_format fmt else
- match fmt.[j] with
+ match Sformat.get fmt j with
| ']' -> j
| c -> find_in_set (j + 1)
and find_set i =
if i > lim then incomplete_format fmt else
- match fmt.[i] with
+ match Sformat.get fmt i with
| ']' -> find_in_set (i + 1)
| c -> find_in_set i in
if i > lim then incomplete_format fmt else
- match fmt.[i] with
+ match Sformat.get fmt i with
| '^' ->
let i = i + 1 in
let j = find_set i in
- j, Neg_set (String.sub fmt i (j - i))
+ j, Neg_set (Sformat.sub fmt i (j - i))
| _ ->
let j = find_set i in
- j, Pos_set (String.sub fmt i (j - i));;
+ j, Pos_set (Sformat.sub fmt i (j - i));;
(* Char sets are now represented as bitvects that are represented as
byte strings. *)
@@ -874,11 +944,6 @@ let rec skip_whites ib =
| _ -> ()
end;;
-external format_to_string :
- ('a, 'b, 'c, 'd) format4 -> string = "%identity";;
-external string_to_format :
- string -> ('a, 'b, 'c, 'd) format4 = "%identity";;
-
(* The [kscanf] main scanning function.
It takes as arguments:
- an input buffer [ib] from which to read characters,
@@ -899,8 +964,8 @@ external string_to_format :
aborts and applies the scanning buffer and a string that explains
the error to the error handling function [ef] (the error continuation). *)
let kscanf ib ef fmt f =
- let fmt = format_to_string fmt in
- let lim = String.length fmt - 1 in
+
+ let lim = Sformat.length fmt - 1 in
let return v = Obj.magic v () in
let delay f x () = f x in
@@ -909,7 +974,7 @@ let kscanf ib ef fmt f =
let rec scan_fmt f i =
if i > lim then f else
- match fmt.[i] with
+ match Sformat.get fmt i with
| ' ' -> skip_whites ib; scan_fmt f (i + 1)
| '%' ->
if i > lim then incomplete_format fmt else
@@ -917,13 +982,13 @@ let kscanf ib ef fmt f =
| '@' ->
let i = i + 1 in
if i > lim then incomplete_format fmt else begin
- check_char ib fmt.[i];
+ check_char ib (Sformat.get fmt i);
scan_fmt f (i + 1) end
| c -> check_char ib c; scan_fmt f (i + 1)
and scan_conversion skip max f i =
let stack = if skip then no_stack else stack in
- match fmt.[i] with
+ match Sformat.get fmt i with
| '%' as conv ->
check_char ib conv; scan_fmt f (i + 1)
| 'c' when max = 0 ->
@@ -961,14 +1026,14 @@ let kscanf ib ef fmt f =
| 'l' | 'n' | 'L' as conv ->
let i = i + 1 in
if i > lim then scan_fmt (stack f (get_count conv ib)) i else begin
- match fmt.[i] with
+ match Sformat.get fmt i with
(* This is in fact an integer conversion (e.g. %ld, %ni, or %Lo). *)
| 'd' | 'i' | 'o' | 'u' | 'x' | 'X' as conv ->
let _x = scan_int_conv conv max ib in
(* Look back to the character that triggered the integer conversion
(this character is either 'l', 'n' or 'L'), to find the
conversion to apply to the integer token read. *)
- begin match fmt.[i - 1] with
+ begin match Sformat.get fmt (i - 1) with
| 'l' -> scan_fmt (stack f (token_int32 conv ib)) (i + 1)
| 'n' -> scan_fmt (stack f (token_nativeint conv ib)) (i + 1)
| _ -> scan_fmt (stack f (token_int64 conv ib)) (i + 1) end
@@ -985,14 +1050,14 @@ let kscanf ib ef fmt f =
| '0' .. '9' as conv ->
let rec read_width accu i =
if i > lim then accu, i else
- match fmt.[i] with
+ match Sformat.get fmt i with
| '0' .. '9' as c ->
let accu = 10 * accu + int_value_of_char c in
read_width accu (i + 1)
| _ -> accu, i in
let max, i = read_width (int_value_of_char conv) (i + 1) in
if i > lim then incomplete_format fmt else begin
- match fmt.[i] with
+ match Sformat.get fmt i with
| '.' ->
let p, i = read_width 0 (i + 1) in
scan_conversion skip (max + p + 1) f i
@@ -1001,7 +1066,7 @@ let kscanf ib ef fmt f =
let i = succ i in
let j =
Printf.sub_format incomplete_format bad_conversion conv fmt i + 1 in
- let mf = String.sub fmt i (j - i - 2) in
+ let mf = Sformat.sub fmt i (j - i - 2) in
let _x = scan_String max ib in
let rf = token_string ib in
if not (compatible_format_type mf rf)
@@ -1013,8 +1078,8 @@ let kscanf ib ef fmt f =
and scan_fmt_stoppers i =
if i > lim then i - 1, [] else
- match fmt.[i] with
- | '@' when i < lim -> let i = i + 1 in i, [fmt.[i]]
+ match Sformat.get fmt i with
+ | '@' when i < lim -> let i = i + 1 in i, [Sformat.get fmt i]
| '@' when i = lim -> incomplete_format fmt
| _ -> i - 1, [] in
@@ -1035,7 +1100,7 @@ let sscanf s = bscanf (Scanning.from_string s);;
let scanf fmt = bscanf Scanning.stdib fmt;;
let bscanf_format ib fmt f =
- let fmt = format_to_string fmt in
+ let fmt = Sformat.unsafe_to_string fmt in
let fmt1 = ignore (scan_String max_int ib); token_string ib in
if not (compatible_format_type fmt1 fmt) then
format_mismatch fmt1 fmt ib else
@@ -1044,4 +1109,4 @@ let bscanf_format ib fmt f =
let sscanf_format s fmt f = bscanf_format (Scanning.from_string s) fmt f;;
-let scan_format s fmt = sscanf_format s fmt (fun x -> x);;
+let format_from_string s fmt = sscanf_format s fmt (fun x -> x);;
diff --git a/stdlib/scanf.mli b/stdlib/scanf.mli
index 646fcccdb..14cd949ed 100644
--- a/stdlib/scanf.mli
+++ b/stdlib/scanf.mli
@@ -269,7 +269,7 @@ val sscanf_format :
(('a, 'b, 'c, 'd) format4 -> 'e) -> 'e;;
(** Same as {!Scanf.bscanf}, but inputs from the given string. *)
-val scan_format :
+val format_from_string :
string -> ('a, 'b, 'c, 'd) format4 -> ('a, 'b, 'c, 'd) format4;;
(** Same as {!Scanf.sscanf_format}, but converts the given string to a format
string. *)
diff --git a/stdlib/sys.ml b/stdlib/sys.ml
index ab8694733..7ed1a5071 100644
--- a/stdlib/sys.ml
+++ b/stdlib/sys.ml
@@ -78,4 +78,4 @@ let catch_break on =
(* OCaml version string, must be in the format described in sys.mli. *)
-let ocaml_version = "3.10+dev5 (2006-04-05)";;
+let ocaml_version = "3.10+dev6 (2006-04-05)";;