diff options
author | Pierre Weis <Pierre.Weis@inria.fr> | 2003-04-25 10:21:21 +0000 |
---|---|---|
committer | Pierre Weis <Pierre.Weis@inria.fr> | 2003-04-25 10:21:21 +0000 |
commit | e3c25d1b244f393eb17c468bddb240b4817b6b42 (patch) | |
tree | 1bfd1273168a0244dfe466e83283ec6d72c1ff46 /stdlib | |
parent | 1d52692ccf904c61c3a893e2b289bdd88801d4f1 (diff) |
Correcting the bug of large buffering when scanning files.
Introduction of two new functions Scanning.from_file and
Scanning.from_file_bin to alleviate this problem.
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5506 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
Diffstat (limited to 'stdlib')
-rw-r--r-- | stdlib/scanf.ml | 15 | ||||
-rw-r--r-- | stdlib/scanf.mli | 25 |
2 files changed, 33 insertions, 7 deletions
diff --git a/stdlib/scanf.ml b/stdlib/scanf.ml index bb37064f8..d1592c20a 100644 --- a/stdlib/scanf.ml +++ b/stdlib/scanf.ml @@ -75,6 +75,8 @@ val begin_of_input : scanbuf -> bool;; val from_string : string -> scanbuf;; val from_channel : in_channel -> scanbuf;; +val from_file : string -> scanbuf;; +val from_file_bin : string -> scanbuf;; val from_function : (unit -> char) -> scanbuf;; end;; @@ -136,6 +138,8 @@ let store_char ib c max = next_char ib; max - 1;; +let default_token_buffer_size = 1024;; + let create next = let ib = { bof = true; @@ -143,7 +147,7 @@ let create next = cur_char = '\000'; char_count = 0; get_next_char = next; - tokbuf = Buffer.create 1024; + tokbuf = Buffer.create default_token_buffer_size; token_count = 0; } in ib;; @@ -163,7 +167,7 @@ let from_function = create;; (* Perform bufferized input to improve efficiency. *) let file_buffer_size = ref 1024;; -let from_channel ic = +let from_file_channel ic = let len = !file_buffer_size in let buf = String.create len in let i = ref 0 in @@ -178,6 +182,13 @@ let from_channel ic = end in create next;; +let from_file fname = from_file_channel (open_in fname);; +let from_file_bin fname = from_file_channel (open_in_bin fname);; + +let from_channel ic = + let next () = input_char ic in + create next;; + let stdib = from_channel stdin;; (** The scanning buffer reading from [stdin].*) diff --git a/stdlib/scanf.mli b/stdlib/scanf.mli index 3a5a0ac65..e8639e7c4 100644 --- a/stdlib/scanf.mli +++ b/stdlib/scanf.mli @@ -35,17 +35,29 @@ val from_string : string -> scanbuf;; Reading starts from the first character in the string. The end-of-input condition is set when the end of the string is reached. *) -val from_channel : in_channel -> scanbuf;; -(** [Scanning.from_channel inchan] returns a scanning buffer which reads - from the input channel [inchan], at the current reading position. *) +val from_file : string -> scanbuf;; +(** Bufferized file reading in text mode. The efficient and usual + way to scan text mode files (in effect, [from_file] returns a + buffer that reads characters in large chunks, rather than one + character at a time as buffers returned by [from_channel] do). + [Scanning.from_file fname] returns a scanning buffer which reads + from the given file [fname] in text mode. *) + +val from_file_bin : string -> scanbuf;; +(** Bufferized file reading in binary mode. *) val from_function : (unit -> char) -> scanbuf;; (** [Scanning.from_function f] returns a scanning buffer with the given function as its reading method. When scanning needs one more character, the given function is called. - When the function has no more character to provide, it must set + When the function has no more character to provide, it must signal an end-of-input condition by raising the exception [End_of_file]. *) +val from_channel : in_channel -> scanbuf;; +(** [Scanning.from_channel inchan] returns a scanning buffer which reads + one character at a time from the input channel [inchan], starting at the + current reading position. *) + val end_of_input : scanbuf -> bool;; (** [Scanning.end_of_input scanbuf] tests the end of input condition of the given buffer. *) @@ -157,7 +169,10 @@ val bscanf : [ocamlyacc]-generated parsers. *) val fscanf : in_channel -> ('a, Scanning.scanbuf, 'b, 'b) format -> 'a -> 'b;; -(** Same as {!Scanf.bscanf}, but inputs from the given channel. *) +(** Same as {!Scanf.bscanf}, but inputs from the given channel. + If efficiency is a concern, when scanning a file [fname], + consider using [bscanf] in conjonction with fast bufferized reading, + as obtained by [bscanf (Scanning.from_file fname)]. *) val sscanf : string -> ('a, Scanning.scanbuf, 'b, 'b) format -> 'a -> 'b;; (** Same as {!Scanf.bscanf}, but inputs from the given string. *) |