diff options
-rw-r--r-- | stdlib/printf.ml | 14 | ||||
-rw-r--r-- | stdlib/printf.mli | 38 | ||||
-rw-r--r-- | stdlib/scanf.ml | 100 | ||||
-rw-r--r-- | stdlib/scanf.mli | 290 | ||||
-rw-r--r-- | typing/typecore.ml | 90 |
5 files changed, 287 insertions, 245 deletions
diff --git a/stdlib/printf.ml b/stdlib/printf.ml index 5508768dc..c55c64d36 100644 --- a/stdlib/printf.ml +++ b/stdlib/printf.ml @@ -217,7 +217,7 @@ let iter_on_format_args fmt add_conv add_char = and scan_conv skip i = if i > lim then incomplete_format fmt else match Sformat.unsafe_get fmt i with - | '%' | '!' | ',' -> succ i + | '%' | '@' | '!' | ',' -> succ i | 's' | 'S' | '[' -> add_conv skip i 's' | 'c' | 'C' -> add_conv skip i 'c' | 'd' | 'i' |'o' | 'u' | 'x' | 'X' | 'N' -> add_conv skip i 'i' @@ -505,8 +505,10 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m = and scan_conv spec n widths i = match Sformat.unsafe_get fmt i with - | '%' -> - cont_s n "%" (succ i) + | '%' | '@' as c -> + cont_s n (String.make 1 c) (succ i) + | '!' -> cont_f n (succ i) + | ',' -> cont_s n "" (succ i) | 's' | 'S' as conv -> let (x : string) = get_arg spec n in let x = if conv = 's' then x else "\"" ^ String.escaped x ^ "\"" in @@ -515,6 +517,8 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m = if i = succ pos then x else format_string (extract_format fmt pos i widths) x in cont_s (next_index spec n) s (succ i) + | '[' as conv -> + bad_conversion_format fmt i conv | 'c' | 'C' as conv -> let (x : char) = get_arg spec n in let s = @@ -546,6 +550,8 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m = let n = Sformat.succ_index (get_index spec n) in let arg = get_arg Spec_none n in cont_a (next_index spec n) printer arg (succ i) + | 'r' as conv -> + bad_conversion_format fmt i conv | 't' -> let printer = get_arg spec n in cont_t (next_index spec n) printer (succ i) @@ -570,8 +576,6 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m = let s = format_int (extract_format_int 'n' fmt pos i widths) x in cont_s (next_index spec n) s (succ i) end - | ',' -> cont_s n "" (succ i) - | '!' -> cont_f n (succ i) | '{' | '(' as conv (* ')' '}' *) -> let (xf : ('a, 'b, 'c, 'd, 'e, 'f) format6) = get_arg spec n in let i = succ i in diff --git a/stdlib/printf.mli b/stdlib/printf.mli index 83366a532..942ec49b0 100644 --- a/stdlib/printf.mli +++ b/stdlib/printf.mli @@ -20,7 +20,7 @@ val fprintf : out_channel -> ('a, out_channel, unit) format -> 'a [arg1] to [argN] according to the format string [format], and outputs the resulting string on the channel [outchan]. - The format is a character string which contains two types of + The format string is a character string which contains two types of objects: plain characters, which are simply copied to the output channel, and conversion specifications, each of which causes conversion and printing of arguments. @@ -31,20 +31,23 @@ val fprintf : out_channel -> ('a, out_channel, unit) format -> 'a In short, a conversion specification consists in the [%] character, followed by optional modifiers and a type which is made of one or - two characters. The types and their meanings are: + two characters. - - [d], [i], [n], [l], [L], or [N]: convert an integer argument to - signed decimal. - - [u]: convert an integer argument to unsigned decimal. + The types and their meanings are: + + - [d], [i]: convert an integer argument to signed decimal. + - [u], [n], [l], [L], or [N]: convert an integer argument to + unsigned decimal. Warning: [n], [l], [L], and [N] are + used for [scanf], and should not be used for [printf]. - [x]: convert an integer argument to unsigned hexadecimal, using lowercase letters. - [X]: convert an integer argument to unsigned hexadecimal, using uppercase letters. - [o]: convert an integer argument to unsigned octal. - [s]: insert a string argument. - - [S]: insert a string argument in OCaml syntax (double quotes, escapes). + - [S]: convert a string argument to OCaml syntax (double quotes, escapes). - [c]: insert a character argument. - - [C]: insert a character argument in OCaml syntax (single quotes, escapes). + - [C]: convert a character argument to OCaml syntax (single quotes, escapes). - [f]: convert a floating-point argument to decimal notation, in the style [dddd.ddd]. - [F]: convert a floating-point argument to OCaml syntax ([dddd.] @@ -54,37 +57,40 @@ val fprintf : out_channel -> ('a, out_channel, unit) format -> 'a - [g] or [G]: convert a floating-point argument to decimal notation, in style [f] or [e], [E] (whichever is more compact). - [B]: convert a boolean argument to the string [true] or [false] - - [b]: convert a boolean argument (for backward compatibility; do not - use in new programs). + - [b]: convert a boolean argument (deprecated; do not use in new + programs). - [ld], [li], [lu], [lx], [lX], [lo]: convert an [int32] argument to the format specified by the second letter (decimal, hexadecimal, etc). - [nd], [ni], [nu], [nx], [nX], [no]: convert a [nativeint] argument to the format specified by the second letter. - [Ld], [Li], [Lu], [Lx], [LX], [Lo]: convert an [int64] argument to the format specified by the second letter. - - [a]: user-defined printer. Takes two arguments and applies the + - [a]: user-defined printer. Take two arguments and apply the first one to [outchan] (the current output channel) and to the second argument. The first argument must therefore have type [out_channel -> 'b -> unit] and the second ['b]. The output produced by the function is inserted in the output of [fprintf] at the current point. - - [t]: same as [%a], but takes only one argument (with type + - [t]: same as [%a], but take only one argument (with type [out_channel -> unit]) and apply it to [outchan]. - [\{ fmt %\}]: convert a format string argument. The argument must have the same type as the internal format string [fmt]. - - [( fmt %)]: format string substitution. Takes a format string - argument and substitutes it to the internal format string [fmt] + - [( fmt %)]: format string substitution. Take a format string + argument and substitute it to the internal format string [fmt] to print following arguments. The argument must have the same type as the internal format string [fmt]. - [!]: take no argument and flush the output. - [%]: take no argument and output one [%] character. - - [,]: the no-op delimiter for conversion specifications. + - [\@]: take no argument and output one [\@] character. + - [,]: take no argument and do nothing. The optional [flags] are: - [-]: left-justify the output (default is right justification). - [0]: for numerical conversions, pad with zeroes instead of spaces. - - [+]: for numerical conversions, prefix number with a [+] sign if positive. - - space: for numerical conversions, prefix number with a space if positive. + - [+]: for signed numerical conversions, prefix number with a [+] + sign if positive. + - space: for signed numerical conversions, prefix number with a + space if positive. - [#]: request an alternate formatting style for numbers. The optional [width] is an integer indicating the minimal diff --git a/stdlib/scanf.ml b/stdlib/scanf.ml index 2bfd45cc6..9c6ecef62 100644 --- a/stdlib/scanf.ml +++ b/stdlib/scanf.ml @@ -438,7 +438,7 @@ let int_of_width_opt = function ;; let int_of_prec_opt = function - | None -> 0 + | None -> max_int | Some prec -> prec ;; @@ -999,31 +999,51 @@ type char_set = | Neg_set of string (* Negative (complementary) set. *) ;; + (* Char sets are read as sub-strings in the format string. *) -let read_char_set fmt i = - let lim = Sformat.length fmt - 1 in +let scan_range fmt j = + + let len = Sformat.length fmt in + + let buffer = Buffer.create len in - let rec find_in_set j = - if j > lim then incomplete_format fmt else + let rec scan_closing j = + if j >= len then incomplete_format fmt else match Sformat.get fmt j with - | ']' -> j - | _ -> find_in_set (succ j) - - and find_set i = - if i > lim then incomplete_format fmt else - match Sformat.get fmt i with - | ']' -> find_in_set (succ i) - | _ -> find_in_set i in - - if i > lim then incomplete_format fmt else - match Sformat.get fmt i with - | '^' -> - let i = succ i in - let j = find_set i in - j, Neg_set (Sformat.sub fmt (Sformat.index_of_int i) (j - i)) - | _ -> - let j = find_set i in - j, Pos_set (Sformat.sub fmt (Sformat.index_of_int i) (j - i)) + | ']' -> j, Buffer.contents buffer + | '%' -> + let j = j + 1 in + if j >= len then incomplete_format fmt else + begin match Sformat.get fmt j with + | '%' | '@' as c -> + Buffer.add_char buffer c; + scan_closing (j + 1) + | c -> bad_conversion fmt j c + end + | c -> + Buffer.add_char buffer c; + scan_closing (j + 1) in + + let scan_first_pos j = + if j >= len then incomplete_format fmt else + match Sformat.get fmt j with + | ']' as c -> + Buffer.add_char buffer c; + scan_closing (j + 1) + | _ -> scan_closing j in + + let rec scan_first_neg j = + if j >= len then incomplete_format fmt else + match Sformat.get fmt j with + | '^' -> + let j = j + 1 in + let k, char_set = scan_first_pos j in + k, Neg_set char_set + | _ -> + let k, char_set = scan_first_pos j in + k, Pos_set char_set in + + scan_first_neg j ;; (* Char sets are now represented as bit vectors that are represented as @@ -1370,18 +1390,19 @@ let scan_format ib ef fmt rv f = let width = int_of_width_opt width_opt in let prec = int_of_prec_opt prec_opt in match Sformat.get fmt i with - | '%' as conv -> - check_char ib conv; scan_fmt ir f (succ i) + | '%' | '@' as c -> + check_char ib c; + scan_fmt ir f (succ i) | 's' -> - let i, stp = scan_fmt_stoppers (succ i) in + let i, stp = scan_indication (succ i) in let _x = scan_string stp width ib in scan_fmt ir (stack f (token_string ib)) (succ i) | 'S' -> let _x = scan_String width ib in scan_fmt ir (stack f (token_string ib)) (succ i) | '[' (* ']' *) -> - let i, char_set = read_char_set fmt (succ i) in - let i, stp = scan_fmt_stoppers (succ i) in + let i, char_set = scan_range fmt (succ i) in + let i, stp = scan_indication (succ i) in let _x = scan_chars_in_char_set stp char_set width ib in scan_fmt ir (stack f (token_string ib)) (succ i) | ('c' | 'C') when width = 0 -> @@ -1458,12 +1479,23 @@ let scan_format ib ef fmt rv f = | c -> bad_conversion fmt i c - and scan_fmt_stoppers i = - if i > lim then i - 1, [] else - match Sformat.get fmt i with - | '@' when i < lim -> let i = succ i in i, [Sformat.get fmt i] - | '@' when i = lim -> incomplete_format fmt - | _ -> i - 1, [] in + and scan_indication j = + if j > lim then j - 1, [] else + match Sformat.get fmt j with + | '@' -> + let k = j + 1 in + if k > lim then j - 1, [] else + begin match Sformat.get fmt k with + | '%' -> + let k = k + 1 in + if k > lim then j - 1, [] else + begin match Sformat.get fmt k with + | '%' | '@' as c -> k, [ c ] + | _c -> j - 1, [] + end + | c -> k, [ c ] + end + | _c -> j - 1, [] in scan_fmt in diff --git a/stdlib/scanf.mli b/stdlib/scanf.mli index 7ab0a23d1..53317d66d 100644 --- a/stdlib/scanf.mli +++ b/stdlib/scanf.mli @@ -232,21 +232,14 @@ val bscanf : Scanning.in_channel -> ('a, 'b, 'c, 'd) scanner;; (** {6 Format string description} *) -(** The format string is a character string which contains three types of +(** The format is a character string which contains three types of objects: - plain characters, which are simply matched with the characters of the input (with a special case for space and line feed, see {!Scanf.space}), - conversion specifications, each of which causes reading and conversion of one argument for the function [f] (see {!Scanf.conversion}), - scanning indications to specify boundaries of tokens - (see scanning {!Scanf.indication}). - - As a special convention for format strings, the [\@] character introduces - an escape for both characters [\@] and [%]: in a format string, - [\@\@] and [\@%] are respectively equivalent to the plain characters [\@] - and [%]. - @since 3.13 -*) + (see scanning {!Scanf.indication}). *) (** {7:space The space character in format strings} *) @@ -269,157 +262,148 @@ val bscanf : Scanning.in_channel -> ('a, 'b, 'c, 'd) scanner;; (** {7:conversion Conversion specifications in format strings} *) -(** Conversion specifications have the following form: - - [% \[flags\] \[width\] \[.precision\] type] - - In short, a conversion specification consists in the [%] character, - followed by optional modifiers, and a type which is made of one or - several characters. - - The types and their meanings are: - - - [d]: reads an optionally signed decimal integer. - - [i]: reads an optionally signed integer - (usual input conventions for decimal ([0-9]+), hexadecimal - ([0x[0-9a-f]+] and [0X[0-9A-F]+]), octal ([0o[0-7]+]), and binary - ([0b[0-1]+]) notations are understood). - - [u]: reads an unsigned decimal integer. - - [x] or [X]: reads an unsigned hexadecimal integer ([[0-9a-f]+] or [[0-9A-F]+]). - - [o]: reads an unsigned octal integer ([[0-7]+]). - - [s]: reads a string argument that spreads as much as possible, until - the following bounding conditions holds: - {ul - {- a whitespace has been found (see {!Scanf.space}),} - {- a scanning indication has been encountered - (see scanning {!Scanf.indication}),} - {- the end-of-input has been reached.} - } - Hence, the [%s] conversion always succeeds: it returns an empty - string, if the bounding condition holds when the scan begins. - - [S]: reads a delimited string argument (delimiters and special - escaped characters follow the lexical conventions of OCaml). - - [c]: reads a single character. To test the current input character - without reading it, specify a null field width, i.e. use - specification [%0c]. Raise [Invalid_argument], if the field width - specification is greater than 1. - - [C]: reads a single delimited character (delimiters and special - escaped characters follow the lexical conventions of OCaml). - - [f], [e], [E], [g], [G]: reads an optionally signed - floating-point number in decimal notation, in the style [dddd.ddd - e/E+-dd]. - - [F]: reads a floating point number according to the lexical - conventions of OCaml (hence the decimal point is mandatory if the - exponent part is not mentioned). - - [B]: reads a boolean argument ([true] or [false]). - - [b]: reads a boolean argument (for backward compatibility; do not use - in new programs). - - [ld], [li], [lu], [lx], [lX], [lo]: reads an [int32] argument to - the format specified by the second letter for regular integers. - - [nd], [ni], [nu], [nx], [nX], [no]: reads a [nativeint] argument to - the format specified by the second letter for regular integers. - - [Ld], [Li], [Lu], [Lx], [LX], [Lo]: reads an [int64] argument to - the format specified by the second letter for regular integers. - - [\[ range \]]: reads characters that matches one of the characters - mentioned in the range of characters [range] (or not mentioned in - it, if the range starts with [^]). Reads a [string] that can be - empty, if the next input character does not match the range. The set of - characters from [c1] to [c2] (inclusively) is denoted by [c1-c2]. - Hence, [%\[0-9\]] returns a string representing a decimal number - or an empty string if no decimal digit is found; similarly, - [%\[\\048-\\057\\065-\\070\]] returns a string of hexadecimal digits. - If a closing bracket appears in a range, it must occur as the - first character of the range (or just after the [^] in case of - range negation); hence [\[\]\]] matches a [\]] character and - [\[^\]\]] matches any character that is not [\]]. - - [r]: user-defined reader. Takes the next [ri] formatted input function and - applies it to the scanning buffer [ib] to read the next argument. The - input function [ri] must therefore have type [Scanning.in_channel -> 'a] and - the argument read has type ['a]. - - [\{ fmt %\}]: reads a format string argument. - The format string read must have the same type as the format string - specification [fmt]. - For instance, ["%{ %i %}"] reads any format string that can read a value of - type [int]; hence, if [s] is the string ["fmt:\"number is %u\""], then - [Scanf.sscanf s "fmt: %{%i%}"] succeeds and returns the format string - ["number is %u"]. - - [\( fmt %\)]: scanning format substitution. - Reads a format string and then goes on scanning with the format string - read, instead of using [fmt]. - The format string read must have the same type as the format string - specification [fmt] that it replaces. - For instance, ["%( %i %)"] reads any format string that can read a value - of type [int]. - Returns the format string read, and the value read using the format - string read. - Hence, if [s] is the string ["\"%4d\"1234.00"], then - [Scanf.sscanf s "%(%i%)" (fun fmt i -> fmt, i)] evaluates to - [("%4d", 1234)]. - If the special flag [_] is used, the conversion discards the - format string read and only returns the value read with the format - string read. - Hence, if [s] is the string ["\"%4d\"1234.00"], then - [Scanf.sscanf s "%_(%i%)"] is simply equivalent to - [Scanf.sscanf "1234.00" "%4d"]. - - [l]: returns the number of lines read so far. - - [n]: returns the number of characters read so far. - - [N] or [L]: returns the number of tokens read so far. - - [!]: matches the end of input condition. - - [%]: matches one [%] character in the input. - - [,]: the no-op delimiter for conversion specifications. - - Following the [%] character that introduces a conversion, there may be - the special flag [_]: the conversion that follows occurs as usual, - but the resulting value is discarded. - For instance, if [f] is the function [fun i -> i + 1], and [s] is the - string ["x = 1"], then [Scanf.sscanf s "%_s = %i" f] returns [2]. - - The optional [width] is an integer literal indicating the maximal width - of the token to read. - For instance, [%6d] reads an integer, having at most 6 decimal digits; - [%4f] reads a float with at most 4 characters; and [%8\[\\000-\\255\]] - returns the next 8 characters (or all the characters still available, - if fewer than 8 characters are available in the input). - - The optional [precision] is a dot [.] followed by an integer literal - indicating the maximum number of digits that follow the decimal point in - the [%f], [%e], and [%E] conversions. For instance, [%.4f] reads a - [float] with at most 4 fractional digits. - - Notes: - - - as mentioned above, the [%s] conversion always succeeds, even if there is - nothing to read in the input: in this case, it simply returns [""]. - - - in addition to the relevant digits, ['_'] characters may appear - inside numbers (this is reminiscent to the usual OCaml lexical - conventions). If stricter scanning is desired, use the range - conversion facility instead of the number conversions. - - - the [scanf] facility is not intended for heavy duty lexical - analysis and parsing. If it appears not expressive enough for your - needs, several alternative exists: regular expressions (module - [Str]), stream parsers, [ocamllex]-generated lexers, - [ocamlyacc]-generated parsers. *) +(** Conversion specifications consist in the [%] character, followed by + an optional flag, an optional field width, and followed by one or + two conversion characters. The conversion characters and their + meanings are: + + - [d]: reads an optionally signed decimal integer. + - [i]: reads an optionally signed integer + (usual input conventions for decimal ([0-9]+), hexadecimal + ([0x[0-9a-f]+] and [0X[0-9A-F]+]), octal ([0o[0-7]+]), and binary + ([0b[0-1]+]) notations are understood). + - [u]: reads an unsigned decimal integer. + - [x] or [X]: reads an unsigned hexadecimal integer ([[0-9a-fA-F]+]). + - [o]: reads an unsigned octal integer ([[0-7]+]). + - [s]: reads a string argument that spreads as much as possible, until the + following bounding condition holds: {ul + {- a whitespace has been found (see {!Scanf.space}),} + {- a scanning indication (see scanning {!Scanf.indication}) has been + encountered,} + {- the end-of-input has been reached.}} + Hence, this conversion always succeeds: it returns an empty + string, if the bounding condition holds when the scan begins. + - [S]: reads a delimited string argument (delimiters and special + escaped characters follow the lexical conventions of Caml). + - [c]: reads a single character. To test the current input character + without reading it, specify a null field width, i.e. use + specification [%0c]. Raise [Invalid_argument], if the field width + specification is greater than 1. + - [C]: reads a single delimited character (delimiters and special + escaped characters follow the lexical conventions of Caml). + - [f], [e], [E], [g], [G]: reads an optionally signed + floating-point number in decimal notation, in the style [dddd.ddd + e/E+-dd]. + - [F]: reads a floating point number according to the lexical + conventions of Caml (hence the decimal point is mandatory if the + exponent part is not mentioned). + - [B]: reads a boolean argument ([true] or [false]). + - [b]: reads a boolean argument (for backward compatibility; do not use + in new programs). + - [ld], [li], [lu], [lx], [lX], [lo]: reads an [int32] argument to + the format specified by the second letter for regular integers. + - [nd], [ni], [nu], [nx], [nX], [no]: reads a [nativeint] argument to + the format specified by the second letter for regular integers. + - [Ld], [Li], [Lu], [Lx], [LX], [Lo]: reads an [int64] argument to + the format specified by the second letter for regular integers. + - [\[ range \]]: reads characters that matches one of the characters + mentioned in the range of characters [range] (or not mentioned in + it, if the range starts with [^]). Reads a [string] that can be + empty, if the next input character does not match the range. The set of + characters from [c1] to [c2] (inclusively) is denoted by [c1-c2]. + Hence, [%\[0-9\]] returns a string representing a decimal number + or an empty string if no decimal digit is found; similarly, + [%\[\\048-\\057\\065-\\070\]] returns a string of hexadecimal digits. + If a closing bracket appears in a range, it must occur as the + first character of the range (or just after the [^] in case of + range negation); hence [\[\]\]] matches a [\]] character and + [\[^\]\]] matches any character that is not [\]]. + Use [%%] and [%\@] to include a [%] or a [\@] in a range. + - [r]: user-defined reader. Takes the next [ri] formatted input function and + applies it to the scanning buffer [ib] to read the next argument. The + input function [ri] must therefore have type [Scanning.in_channel -> 'a] and + the argument read has type ['a]. + - [\{ fmt %\}]: reads a format string argument. + The format string read must have the same type as the format string + specification [fmt]. + For instance, ["%{ %i %}"] reads any format string that can read a value of + type [int]; hence, if [s] is the string ["fmt:\"number is %u\""], then + [Scanf.sscanf s "fmt: %{%i%}"] succeeds and returns the format string + ["number is %u"]. + - [\( fmt %\)]: scanning format substitution. + Reads a format string and then goes on scanning with the format string + read, instead of using [fmt]. + The format string read must have the same type as the format string + specification [fmt] that it replaces. + For instance, ["%( %i %)"] reads any format string that can read a value + of type [int]. + Returns the format string read, and the value read using the format + string read. + Hence, if [s] is the string ["\"%4d\"1234.00"], then + [Scanf.sscanf s "%(%i%)" (fun fmt i -> fmt, i)] evaluates to + [("%4d", 1234)]. + If the special flag [_] is used, the conversion discards the + format string read and only returns the value read with the format + string read. + Hence, if [s] is the string ["\"%4d\"1234.00"], then + [Scanf.sscanf s "%_(%i%)"] is simply equivalent to + [Scanf.sscanf "1234.00" "%4d"]. + - [l]: returns the number of lines read so far. + - [n]: returns the number of characters read so far. + - [N] or [L]: returns the number of tokens read so far. + - [!]: matches the end of input condition. + - [%]: matches one [%] character in the input. + - [\@]: matches one [\@] character in the input. + - [,]: does nothing. + + Following the [%] character that introduces a conversion, there may be + the special flag [_]: the conversion that follows occurs as usual, + but the resulting value is discarded. + For instance, if [f] is the function [fun i -> i + 1], and [s] is the + string ["x = 1"], then [Scanf.sscanf s "%_s = %i" f] returns [2]. + + The field width is composed of an optional integer literal + indicating the maximal width of the token to read. + For instance, [%6d] reads an integer, having at most 6 decimal digits; + [%4f] reads a float with at most 4 characters; and [%8[\\000-\\255]] + returns the next 8 characters (or all the characters still available, + if fewer than 8 characters are available in the input). + + Notes: + + - as mentioned above, a [%s] conversion always succeeds, even if there is + nothing to read in the input: in this case, it simply returns [""]. + + - in addition to the relevant digits, ['_'] characters may appear + inside numbers (this is reminiscent to the usual Caml lexical + conventions). If stricter scanning is desired, use the range + conversion facility instead of the number conversions. + + - the [scanf] facility is not intended for heavy duty lexical + analysis and parsing. If it appears not expressive enough for your + needs, several alternative exists: regular expressions (module + [Str]), stream parsers, [ocamllex]-generated lexers, + [ocamlyacc]-generated parsers. *) (** {7:indication Scanning indications in format strings} *) (** Scanning indications appear just after the string conversions [%s] - and [%\[ range \]] to delimit the end of the token. A scanning + and [%[ range ]] to delimit the end of the token. A scanning indication is introduced by a [\@] character, followed by some - literal character [c]. It means that the string token should end + plain character [c]. It means that the string token should end just before the next matching [c] (which is skipped). If no [c] character is encountered, the string token spreads as much as possible. For instance, ["%s@\t"] reads a string up to the next - tab character or up to the end of input. - - When it does not introduce a scanning indication, the [\@] character - introduces an escape for the next character: [\@c] is treated as a plain - [c] character. + tab character or to the end of input. If a [\@] character appears + anywhere else in the format string, it is treated as a plain character. Note: - - the scanning indications introduce slight differences in the syntax of + - As usual in format strings, [%] characters must be escaped using [%%] + and [%\@] is equivalent to [\@]; this rule still holds within range + specifications and scanning indications. + For instance, ["%s@%%"] reads a string up to the next [%] character. + - The scanning indications introduce slight differences in the syntax of [Scanf] format strings, compared to those used for the [Printf] module. However, the scanning indications are similar to those used in the [Format] module; hence, when producing formatted text to be scanned @@ -513,10 +497,6 @@ val unescaped : string -> string (** Return a copy of the argument with escape sequences, following the lexical conventions of OCaml, replaced by their corresponding special characters. If there is no escape sequence in the - argument, still return a copy, contrary to String.escaped. *) - -(* - Local Variables: - compile-command: "cd ..; make world" - End: + argument, still return a copy, contrary to String.escaped. + @since 3.13.0 *) diff --git a/typing/typecore.ml b/typing/typecore.ml index cac41a6f2..1db16e0c9 100644 --- a/typing/typecore.ml +++ b/typing/typecore.ml @@ -908,7 +908,7 @@ and is_nonexpansive_opt = function (* Typing format strings for printing or reading. - This format strings are used by functions in modules Printf, Format, and + These format strings are used by functions in modules Printf, Format, and Scanf. (Handling of * modifiers contributed by Thorsten Ohl.) *) @@ -927,25 +927,6 @@ let type_format loc fmt = let incomplete_format fmt = raise (Error (loc, Incomplete_format fmt)) in - let range_closing_index fmt i = - - let len = String.length fmt in - let find_closing j = - if j >= len then incomplete_format fmt else - try String.index_from fmt j ']' with - | Not_found -> incomplete_format fmt in - let skip_pos j = - if j >= len then incomplete_format fmt else - match fmt.[j] with - | ']' -> find_closing (j + 1) - | c -> find_closing j in - let rec skip_neg j = - if j >= len then incomplete_format fmt else - match fmt.[j] with - | '^' -> skip_pos (j + 1) - | c -> skip_pos j in - find_closing (skip_neg (i + 1)) in - let rec type_in_format fmt = let len = String.length fmt in @@ -964,14 +945,7 @@ let type_format loc fmt = else incomplete_format fmt else match fmt.[i] with | '%' -> scan_opts i (i + 1) - | '@' -> skip_indication (i + 1) | _ -> scan_format (i + 1) - and skip_indication i = - if i >= len then incomplete_format fmt else - match fmt.[i] with - | '@' | '%' -> scan_format (i + 1) - | _ -> scan_format i - and scan_opts i j = if j >= len then incomplete_format fmt else match fmt.[j] with @@ -1002,6 +976,48 @@ let type_format loc fmt = match fmt.[j] with | '.' -> scan_width_or_prec_value scan_conversion i (j + 1) | _ -> scan_conversion i j + and scan_indication j = + if j >= len then j - 1 else + match fmt.[j] with + | '@' -> + let k = j + 1 in + if k >= len then j - 1 else + begin match fmt.[k] with + | '%' -> + let k = k + 1 in + if k >= len then j - 1 else + begin match fmt.[k] with + | '%' | '@' -> k + | _c -> j - 1 + end + | _c -> k + end + | _c -> j - 1 + and scan_range j = + let rec scan_closing j = + if j >= len then incomplete_format fmt else + match fmt.[j] with + | ']' -> j + | '%' -> + let j = j + 1 in + if j >= len then incomplete_format fmt else + begin match fmt.[j] with + | '%' | '@' -> scan_closing (j + 1) + | c -> bad_conversion fmt j c + end + | c -> scan_closing (j + 1) in + let scan_first_pos j = + if j >= len then incomplete_format fmt else + match fmt.[j] with + | ']' -> scan_closing (j + 1) + | c -> scan_closing j in + let rec scan_first_neg j = + if j >= len then incomplete_format fmt else + match fmt.[j] with + | '^' -> scan_first_pos (j + 1) + | c -> scan_first_pos j in + + scan_first_neg j and conversion j ty_arg = let ty_uresult, ty_result = scan_format (j + 1) in @@ -1021,13 +1037,16 @@ let type_format loc fmt = and scan_conversion i j = if j >= len then incomplete_format fmt else match fmt.[j] with - | '%' | '!' | ',' -> scan_format (j + 1) - | 's' | 'S' -> conversion j Predef.type_string + | '%' | '@' | '!' | ',' -> scan_format (j + 1) + | 's' | 'S' -> + let j = scan_indication (j + 1) in + conversion j Predef.type_string | '[' -> - let j = range_closing_index fmt j in + let j = scan_range (j + 1) in + let j = scan_indication (j + 1) in conversion j Predef.type_string | 'c' | 'C' -> conversion j Predef.type_char - | 'd' | 'i' | 'o' | 'x' | 'X' | 'u' | 'N' -> + | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' | 'N' -> conversion j Predef.type_int | 'f' | 'e' | 'E' | 'g' | 'G' | 'F' -> conversion j Predef.type_float | 'B' | 'b' -> conversion j Predef.type_bool @@ -1056,7 +1075,7 @@ let type_format loc fmt = let j = j + 1 in if j >= len then conversion (j - 1) Predef.type_int else begin match fmt.[j] with - | 'd' | 'i' | 'o' | 'x' | 'X' | 'u' -> + | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' -> let ty_arg = match c with | 'l' -> Predef.type_int32 @@ -1085,9 +1104,10 @@ let type_format loc fmt = let ty_ureader, ty_args = scan_format 0 in newty (Tconstr - (Predef.path_format6, - [ty_args; ty_input; ty_aresult; ty_ureader; ty_uresult; ty_result], - ref Mnil)) in + (Predef.path_format6, + [ ty_args; ty_input; ty_aresult; + ty_ureader; ty_uresult; ty_result; ], + ref Mnil)) in type_in_format fmt |