diff options
Diffstat (limited to 'stdlib')
-rw-r--r-- | stdlib/scanf.ml | 30 | ||||
-rw-r--r-- | stdlib/scanf.mli | 35 |
2 files changed, 41 insertions, 24 deletions
diff --git a/stdlib/scanf.ml b/stdlib/scanf.ml index ac950e4cb..ba04244ef 100644 --- a/stdlib/scanf.ml +++ b/stdlib/scanf.ml @@ -482,18 +482,18 @@ let compatible_format_type fmt1 fmt2 = Tformat.summarize_format_type (string_to_format fmt2);; (* Checking that [c] is indeed in the input, then skips it. - In this case, the character c has been explicitly specified in the + In this case, the character [c] has been explicitly specified in the format as being mandatory in the input; hence we should fail with End_of_file in case of end_of_input. (Remember that Scan_failure is raised only when (we can prove by evidence) that the input does not match the format string given. We must thus differentiate End_of_file as an error due to lack of input, and Scan_failure which is due to provably wrong - input. I am not sure this is worth to burden: it is complex and somehow + input. I am not sure this is worth the burden: it is complex and somehow subliminal; should be clearer to fail with Scan_failure "Not enough input to complete scanning"!) That's why, waiting for a better solution, we use checked_peek_char here. - We are also careful to treat "\r\n" in the input as a end of line marker: it + We are also careful to treat "\r\n" in the input as an end of line marker: it always matches a '\n' specification in the input format string. *) let rec check_char ib c = let ci = Scanning.checked_peek_char ib in @@ -1451,20 +1451,34 @@ let scan_format ib ef fmt rv f = | _ -> scan_fmt ir (stack f (get_count conv0 ib)) i end | '(' | '{' as conv (* ')' '}' *) -> let i = succ i in - (* Find the static specification for the format to read. *) + (* Find [mf], the static specification for the format to read. *) let j = Tformat.sub_format incomplete_format bad_conversion conv fmt i in let mf = Sformat.sub fmt (Sformat.index_of_int i) (j - 2 - i) in - (* Read the specified format string in the input buffer, - and check its correctness. *) + (* Read [rf], the specified format string in the input buffer, + and check its correctness w.r.t. [mf]. *) let _x = scan_String width ib in let rf = token_string ib in if not (compatible_format_type rf mf) then format_mismatch rf mf else + (* Proceed according to the kind of metaformat found: + - %{ mf %} simply returns [rf] as the token read, + - %( mf %) returns [rf] as the first token read, then + returns a second token obtained by scanning the input with + format string [rf]. + Behaviour for %( mf %) is mandatory for sake of format string + typechecking specification. To get pure format string + substitution behaviour, you should use %_( mf %) that skips the + first (format string) token and hence properly substitutes [mf] by + [rf] in the format string argument. + *) (* For conversion %{%}, just return this format string as the token - read. *) + read and go on with the rest of the format string argument. *) if conv = '{' (* '}' *) then scan_fmt ir (stack f rf) j else - (* Or else, read according to the format string just read. *) + (* Or else, return this format string as the first token read; + then continue scanning using this format string to get + the following token read; + finally go on with the rest of the format string argument. *) let ir, nf = scan (string_to_format rf) ir (stack f rf) 0 in (* Return the format string read and the value just read, then go on with the rest of the format. *) diff --git a/stdlib/scanf.mli b/stdlib/scanf.mli index 1026ba6fe..113826fd6 100644 --- a/stdlib/scanf.mli +++ b/stdlib/scanf.mli @@ -125,7 +125,7 @@ type file_name = string;; val open_in : file_name -> in_channel;; (** [Scanning.open_in fname] returns a formatted input channel for bufferized - reading in text mode of file [fname]. + reading in text mode from file [fname]. Note: [open_in] returns a formatted input channel that efficiently reads @@ -137,7 +137,7 @@ val open_in : file_name -> in_channel;; val open_in_bin : file_name -> in_channel;; (** [Scanning.open_in_bin fname] returns a formatted input channel for - bufferized reading in binary mode of file [fname]. + bufferized reading in binary mode from file [fname]. @since 3.12.0 *) @@ -339,30 +339,33 @@ val bscanf : Scanning.in_channel -> ('a, 'b, 'c, 'd) scanner;; function and applies it to the scanning buffer [ib] to read the next argument. The input function [ri] must therefore have type [Scanning.in_channel -> 'a] and the argument read has type ['a]. - - [\{ fmt %\}]: reads a format string argument. The format string + - [\{ fmt %\}]: reads a format string argument. The format string read must have the same type as the format string specification - [fmt]. For instance, ["%{ %i %}"] reads any format string that + [fmt]. For instance, ["%{ %i %}"] reads any format string that can read a value of type [int]; hence, if [s] is the string ["fmt:\"number is %u\""], then [Scanf.sscanf s "fmt: %{%i%}"] succeeds and returns the format string ["number is %u"]. - - [\( fmt %\)]: scanning format substitution. - Reads a format string and then goes on scanning with the format string - read, instead of using [fmt]. - The format string read must have the same type as the format string + - [\( fmt %\)]: scanning sub-format substitution. + Reads a format string [rf] in the input, then goes on scanning with + [rf] instead of scanning with [fmt]. + The format string [rf] must have the same type as the format string specification [fmt] that it replaces. For instance, ["%( %i %)"] reads any format string that can read a value of type [int]. - Returns the format string read, and the value read using the format - string read. + The conversion returns the format string read [rf], and then a value + read using [rf]. Hence, if [s] is the string ["\"%4d\"1234.00"], then [Scanf.sscanf s "%(%i%)" (fun fmt i -> fmt, i)] evaluates to [("%4d", 1234)]. - If the special flag [_] is used, the conversion discards the - format string read and only returns the value read with the format - string read. - Hence, if [s] is the string ["\"%4d\"1234.00"], then - [Scanf.sscanf s "%_(%i%)"] is simply equivalent to - [Scanf.sscanf "1234.00" "%4d"]. + + This behaviour is not mere format substitution, since the conversion + returns the format string read as additional argument. If you need + pure format substitution, use special flag [_] to discard the + extraneous argument: conversion [%_\( fmt %\)] reads a format string + [rf] and then behaves the same as format string [rf]. Hence, if [s] is + the string ["\"%4d\"1234.00"], then [Scanf.sscanf s "%_(%i%)"] is + simply equivalent to [Scanf.sscanf "1234.00" "%4d"]. + - [l]: returns the number of lines read so far. - [n]: returns the number of characters read so far. - [N] or [L]: returns the number of tokens read so far. |