summaryrefslogtreecommitdiffstats
path: root/camlp4/ocaml_src/lib
diff options
context:
space:
mode:
Diffstat (limited to 'camlp4/ocaml_src/lib')
-rw-r--r--camlp4/ocaml_src/lib/Makefile14
-rw-r--r--camlp4/ocaml_src/lib/grammar.ml80
-rw-r--r--camlp4/ocaml_src/lib/grammar.mli17
-rw-r--r--camlp4/ocaml_src/lib/plexer.ml521
-rw-r--r--camlp4/ocaml_src/lib/plexer.mli6
-rw-r--r--camlp4/ocaml_src/lib/stdpp.ml78
-rw-r--r--camlp4/ocaml_src/lib/stdpp.mli8
-rw-r--r--camlp4/ocaml_src/lib/token.ml3
-rw-r--r--camlp4/ocaml_src/lib/token.mli7
9 files changed, 609 insertions, 125 deletions
diff --git a/camlp4/ocaml_src/lib/Makefile b/camlp4/ocaml_src/lib/Makefile
index 524831704..d587e7446 100644
--- a/camlp4/ocaml_src/lib/Makefile
+++ b/camlp4/ocaml_src/lib/Makefile
@@ -36,17 +36,17 @@ compare:
done
install:
- -$(MKDIR) $(LIBDIR)/camlp4
- cp $(TARGET) *.mli $(LIBDIR)/camlp4/.
- cp *.cmi $(LIBDIR)/camlp4/.
- if test -f $(TARGET:.cma=.cmxa); then $(MAKE) installopt LIBDIR=$(LIBDIR); fi
+ -$(MKDIR) "$(LIBDIR)/camlp4"
+ cp $(TARGET) *.mli "$(LIBDIR)/camlp4/."
+ cp *.cmi "$(LIBDIR)/camlp4/."
+ if test -f $(TARGET:.cma=.cmxa); then $(MAKE) installopt LIBDIR="$(LIBDIR)"; fi
installopt:
- cp $(TARGET:.cma=.cmxa) *.cmx $(LIBDIR)/camlp4/.
+ cp $(TARGET:.cma=.cmxa) *.cmx "$(LIBDIR)/camlp4/."
if test -f $(TARGET:.cma=.lib); then \
- cp $(TARGET:.cma=.lib) $(LIBDIR)/camlp4/.; \
+ cp $(TARGET:.cma=.lib) "$(LIBDIR)/camlp4/."; \
else \
- tar cf - $(TARGET:.cma=.a) | (cd $(LIBDIR)/camlp4/.; tar xf -); \
+ tar cf - $(TARGET:.cma=.a) | (cd "$(LIBDIR)/camlp4/."; tar xf -); \
fi
include .depend
diff --git a/camlp4/ocaml_src/lib/grammar.ml b/camlp4/ocaml_src/lib/grammar.ml
index 66e7d3580..196a6b954 100644
--- a/camlp4/ocaml_src/lib/grammar.ml
+++ b/camlp4/ocaml_src/lib/grammar.ml
@@ -120,7 +120,71 @@ let print_entry ppf e =
Dlevels elev -> print_levels ppf elev
| Dparser _ -> fprintf ppf "<parser>"
end;
- fprintf ppf " ]@]@."
+ fprintf ppf " ]@]"
+;;
+
+let iter_entry f e =
+ let treated = ref [] in
+ let rec do_entry e =
+ if List.memq e !treated then ()
+ else
+ begin
+ treated := e :: !treated;
+ f e;
+ match e.edesc with
+ Dlevels ll -> List.iter do_level ll
+ | Dparser _ -> ()
+ end
+ and do_level lev = do_tree lev.lsuffix; do_tree lev.lprefix
+ and do_tree =
+ function
+ Node n -> do_node n
+ | LocAct (_, _) | DeadEnd -> ()
+ and do_node n = do_symbol n.node; do_tree n.son; do_tree n.brother
+ and do_symbol =
+ function
+ Smeta (_, sl, _) -> List.iter do_symbol sl
+ | Snterm e | Snterml (e, _) -> do_entry e
+ | Slist0 s | Slist1 s | Sopt s -> do_symbol s
+ | Slist0sep (s1, s2) | Slist1sep (s1, s2) -> do_symbol s1; do_symbol s2
+ | Stree t -> do_tree t
+ | Sself | Snext | Stoken _ -> ()
+ in
+ do_entry e
+;;
+
+let fold_entry f e init =
+ let treated = ref [] in
+ let rec do_entry accu e =
+ if List.memq e !treated then accu
+ else
+ begin
+ treated := e :: !treated;
+ let accu = f e accu in
+ match e.edesc with
+ Dlevels ll -> List.fold_left do_level accu ll
+ | Dparser _ -> accu
+ end
+ and do_level accu lev =
+ let accu = do_tree accu lev.lsuffix in do_tree accu lev.lprefix
+ and do_tree accu =
+ function
+ Node n -> do_node accu n
+ | LocAct (_, _) | DeadEnd -> accu
+ and do_node accu n =
+ let accu = do_symbol accu n.node in
+ let accu = do_tree accu n.son in do_tree accu n.brother
+ and do_symbol accu =
+ function
+ Smeta (_, sl, _) -> List.fold_left do_symbol accu sl
+ | Snterm e | Snterml (e, _) -> do_entry accu e
+ | Slist0 s | Slist1 s | Sopt s -> do_symbol accu s
+ | Slist0sep (s1, s2) | Slist1sep (s1, s2) ->
+ let accu = do_symbol accu s1 in do_symbol accu s2
+ | Stree t -> do_tree accu t
+ | Sself | Snext | Stoken _ -> accu
+ in
+ do_entry init e
;;
type g = Token.t Gramext.grammar;;
@@ -774,7 +838,7 @@ let glexer_of_lexer lexer =
{Token.tok_func = lexer.Token.func; Token.tok_using = lexer.Token.using;
Token.tok_removing = lexer.Token.removing;
Token.tok_match = tematch lexer.Token.tparse;
- Token.tok_text = lexer.Token.text}
+ Token.tok_text = lexer.Token.text; Token.tok_comm = None}
;;
let create lexer = gcreate (glexer_of_lexer lexer);;
@@ -933,12 +997,12 @@ module Entry =
edesc = Dparser (Obj.magic p)}
;;
external obj : 'a e -> te Gramext.g_entry = "%identity";;
- let print e = print_entry std_formatter (obj e);;
+ let print e = printf "%a@." print_entry (obj e);;
let find e s = find_entry (obj e) s;;
end
;;
-let gen_tokens g con =
+let tokens g con =
let list = ref [] in
Hashtbl.iter
(fun (p_con, p_prm) c -> if p_con = con then list := (p_prm, !c) :: !list)
@@ -946,7 +1010,7 @@ let gen_tokens g con =
!list
;;
-let tokens g = gen_tokens (grammar_obj g);;
+let glexer g = g.glexer;;
let warning_verbose = Gramext.warning_verbose;;
@@ -960,6 +1024,7 @@ module type S =
type parsable;;
val parsable : char Stream.t -> parsable;;
val tokens : string -> (string * int) list;;
+ val glexer : te Token.glexer;;
module Entry :
sig
type 'a e;;
@@ -998,7 +1063,8 @@ module GGMake (R : ReinitType) (L : GLexerType) =
type parsable = char Stream.t * (te Stream.t * Token.location_function);;
let gram = gcreate L.lexer;;
let parsable cs = cs, L.lexer.Token.tok_func cs;;
- let tokens = gen_tokens gram;;
+ let tokens = tokens gram;;
+ let glexer = glexer gram;;
module Entry =
struct
type 'a e = te g_entry;;
@@ -1020,7 +1086,7 @@ module GGMake (R : ReinitType) (L : GLexerType) =
(fun _ _ _ (strm__ : _ Stream.t) -> raise Stream.Failure);
edesc = Dparser (Obj.magic p)}
;;
- let print e = print_entry std_formatter (obj e);;
+ let print e = printf "%a@." print_entry (obj e);;
end
;;
module Unsafe =
diff --git a/camlp4/ocaml_src/lib/grammar.mli b/camlp4/ocaml_src/lib/grammar.mli
index c29e8f8a7..d38b449f9 100644
--- a/camlp4/ocaml_src/lib/grammar.mli
+++ b/camlp4/ocaml_src/lib/grammar.mli
@@ -35,6 +35,8 @@ val tokens : g -> string -> (string * int) list;;
list.
- The call [Grammar.token g "IDENT"] returns the list of all usages
of the pattern "IDENT" in the [EXTEND] statements. *)
+val glexer : g -> Token.t Token.glexer;;
+ (** Return the lexer used by the grammar *)
module Entry :
sig
@@ -100,6 +102,7 @@ module type S =
type parsable;;
val parsable : char Stream.t -> parsable;;
val tokens : string -> (string * int) list;;
+ val glexer : te Token.glexer;;
module Entry :
sig
type 'a e;;
@@ -156,6 +159,20 @@ val strict_parsing : bool ref;;
val print_entry : Format.formatter -> 'te Gramext.g_entry -> unit;;
(** General printer for all kinds of entries (obj entries) *)
+val iter_entry :
+ ('te Gramext.g_entry -> unit) -> 'te Gramext.g_entry -> unit;;
+ (** [Grammar.iter_entry f e] applies [f] to the entry [e] and
+ transitively all entries called by [e]. The order in which
+ the entries are passed to [f] is the order they appear in
+ each entry. Each entry is passed only once. *)
+
+val fold_entry :
+ ('te Gramext.g_entry -> 'a -> 'a) -> 'te Gramext.g_entry -> 'a -> 'a;;
+ (** [Grammar.fold_entry f e init] computes [(f eN .. (f e2 (f e1 init)))],
+ where [e1 .. eN] are [e] and transitively all entries called by [e].
+ The order in which the entries are passed to [f] is the order they
+ appear in each entry. Each entry is passed only once. *)
+
(**/**)
(*** deprecated since version 3.05; use rather the functor GMake *)
diff --git a/camlp4/ocaml_src/lib/plexer.ml b/camlp4/ocaml_src/lib/plexer.ml
index 718e17b4f..20471597e 100644
--- a/camlp4/ocaml_src/lib/plexer.ml
+++ b/camlp4/ocaml_src/lib/plexer.ml
@@ -36,6 +36,16 @@ let get_buff len = String.sub !buff 0 len;;
(* The lexer *)
+let stream_peek_nth n strm =
+ let rec loop n =
+ function
+ [] -> None
+ | [x] -> if n == 1 then Some x else None
+ | _ :: l -> loop (n - 1) l
+ in
+ loop n (Stream.npeek n strm)
+;;
+
let rec ident len (strm__ : _ Stream.t) =
match Stream.peek strm__ with
Some
@@ -132,21 +142,359 @@ and end_exponent_part_under len (strm__ : _ Stream.t) =
let error_on_unknown_keywords = ref false;;
let err loc msg = raise_with_loc loc (Token.Error msg);;
-let next_token_fun dfa find_kwd =
+(*
+value next_token_fun dfa find_kwd =
+ let keyword_or_error loc s =
+ try (("", find_kwd s), loc) with
+ [ Not_found ->
+ if error_on_unknown_keywords.val then err loc ("illegal token: " ^ s)
+ else (("", s), loc) ]
+ in
+ let rec next_token =
+ parser bp
+ [ [: `' ' | '\010' | '\013' | '\t' | '\026' | '\012'; s :] ->
+ next_token s
+ | [: `'('; s :] -> left_paren bp s
+ | [: `'#'; s :] -> do { spaces_tabs s; linenum bp s }
+ | [: `('A'..'Z' | '\192'..'\214' | '\216'..'\222' as c); s :] ->
+ let id = get_buff (ident (store 0 c) s) in
+ let loc = (bp, Stream.count s) in
+ (try ("", find_kwd id) with [ Not_found -> ("UIDENT", id) ], loc)
+ | [: `('a'..'z' | '\223'..'\246' | '\248'..'\255' | '_' as c); s :] ->
+ let id = get_buff (ident (store 0 c) s) in
+ let loc = (bp, Stream.count s) in
+ (try ("", find_kwd id) with [ Not_found -> ("LIDENT", id) ], loc)
+ | [: `('1'..'9' as c); s :] ->
+ let tok = number (store 0 c) s in
+ let loc = (bp, Stream.count s) in
+ (tok, loc)
+ | [: `'0'; s :] ->
+ let tok = base_number (store 0 '0') s in
+ let loc = (bp, Stream.count s) in
+ (tok, loc)
+ | [: `'''; s :] ->
+ match Stream.npeek 3 s with
+ [ [_; '''; _] | ['\\'; _; _] | ['\x0D'; '\x0A'; '''] ->
+ let tok = ("CHAR", get_buff (char bp 0 s)) in
+ let loc = (bp, Stream.count s) in
+ (tok, loc)
+ | _ -> keyword_or_error (bp, Stream.count s) "'" ]
+ | [: `'"'; s :] ->
+ let tok = ("STRING", get_buff (string bp 0 s)) in
+ let loc = (bp, Stream.count s) in
+ (tok, loc)
+ | [: `'$'; s :] ->
+ let tok = dollar bp 0 s in
+ let loc = (bp, Stream.count s) in
+ (tok, loc)
+ | [: `('!' | '=' | '@' | '^' | '&' | '+' | '-' | '*' | '/' | '%' as c);
+ s :] ->
+ let id = get_buff (ident2 (store 0 c) s) in
+ keyword_or_error (bp, Stream.count s) id
+ | [: `('~' as c);
+ a =
+ parser
+ [ [: `('a'..'z' as c); len = ident (store 0 c) :] ep ->
+ (("TILDEIDENT", get_buff len), (bp, ep))
+ | [: s :] ->
+ let id = get_buff (ident2 (store 0 c) s) in
+ keyword_or_error (bp, Stream.count s) id ] :] ->
+ a
+ | [: `('?' as c);
+ a =
+ parser
+ [ [: `('a'..'z' as c); len = ident (store 0 c) :] ep ->
+ (("QUESTIONIDENT", get_buff len), (bp, ep))
+ | [: s :] ->
+ let id = get_buff (ident2 (store 0 c) s) in
+ keyword_or_error (bp, Stream.count s) id ] :] ->
+ a
+ | [: `'<'; s :] -> less bp s
+ | [: `(':' as c1);
+ len =
+ parser
+ [ [: `(']' | ':' | '=' | '>' as c2) :] -> store (store 0 c1) c2
+ | [: :] -> store 0 c1 ] :] ep ->
+ let id = get_buff len in
+ keyword_or_error (bp, ep) id
+ | [: `('>' | '|' as c1);
+ len =
+ parser
+ [ [: `(']' | '}' as c2) :] -> store (store 0 c1) c2
+ | [: a = ident2 (store 0 c1) :] -> a ] :] ep ->
+ let id = get_buff len in
+ keyword_or_error (bp, ep) id
+ | [: `('[' | '{' as c1); s :] ->
+ let len =
+ match Stream.npeek 2 s with
+ [ ['<'; '<' | ':'] -> store 0 c1
+ | _ ->
+ match s with parser
+ [ [: `('|' | '<' | ':' as c2) :] -> store (store 0 c1) c2
+ | [: :] -> store 0 c1 ] ]
+ in
+ let ep = Stream.count s in
+ let id = get_buff len in
+ keyword_or_error (bp, ep) id
+ | [: `'.';
+ id =
+ parser
+ [ [: `'.' :] -> ".."
+ | [: :] -> if ssd && after_space then " ." else "." ] :] ep ->
+ keyword_or_error (bp, ep) id
+ | [: `';';
+ id =
+ parser
+ [ [: `';' :] -> ";;"
+ | [: :] -> ";" ] :] ep ->
+ keyword_or_error (bp, ep) id
+ | [: `'\\'; s :] ep -> (("LIDENT", get_buff (ident3 0 s)), (bp, ep))
+ | [: `c :] ep -> keyword_or_error (bp, ep) (String.make 1 c)
+ | [: _ = Stream.empty :] -> (("EOI", ""), (bp, succ bp)) ]
+ and less bp strm =
+ if no_quotations.val then
+ match strm with parser
+ [ [: len = ident2 (store 0 '<') :] ep ->
+ let id = get_buff len in
+ keyword_or_error (bp, ep) id ]
+ else
+ match strm with parser
+ [ [: `'<'; len = quotation bp 0 :] ep ->
+ (("QUOTATION", ":" ^ get_buff len), (bp, ep))
+ | [: `':'; i = parser [: len = ident 0 :] -> get_buff len;
+ `'<' ? "character '<' expected"; len = quotation bp 0 :] ep ->
+ (("QUOTATION", i ^ ":" ^ get_buff len), (bp, ep))
+ | [: len = ident2 (store 0 '<') :] ep ->
+ let id = get_buff len in
+ keyword_or_error (bp, ep) id ]
+ and string bp len =
+ parser
+ [ [: `'"' :] -> len
+ | [: `'\\'; `c; s :] -> string bp (store (store len '\\') c) s
+ | [: `c; s :] -> string bp (store len c) s
+ | [: :] ep -> err (bp, ep) "string not terminated" ]
+ and char bp len =
+ parser
+ [ [: `'''; s :] -> if len = 0 then char bp (store len ''') s else len
+ | [: `'\\'; `c; s :] -> char bp (store (store len '\\') c) s
+ | [: `c; s :] -> char bp (store len c) s
+ | [: :] ep -> err (bp, ep) "char not terminated" ]
+ and dollar bp len =
+ parser
+ [ [: `'$' :] -> ("ANTIQUOT", ":" ^ get_buff len)
+ | [: `('a'..'z' | 'A'..'Z' as c); s :] -> antiquot bp (store len c) s
+ | [: `('0'..'9' as c); s :] -> maybe_locate bp (store len c) s
+ | [: `':'; s :] ->
+ let k = get_buff len in
+ ("ANTIQUOT", k ^ ":" ^ locate_or_antiquot_rest bp 0 s)
+ | [: `'\\'; `c; s :] ->
+ ("ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s)
+ | [: s :] ->
+ if dfa then
+ match s with parser
+ [ [: `c :] ->
+ ("ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s)
+ | [: :] ep -> err (bp, ep) "antiquotation not terminated" ]
+ else ("", get_buff (ident2 (store 0 '$') s)) ]
+ and maybe_locate bp len =
+ parser
+ [ [: `'$' :] -> ("ANTIQUOT", ":" ^ get_buff len)
+ | [: `('0'..'9' as c); s :] -> maybe_locate bp (store len c) s
+ | [: `':'; s :] ->
+ ("LOCATE", get_buff len ^ ":" ^ locate_or_antiquot_rest bp 0 s)
+ | [: `'\\'; `c; s :] ->
+ ("ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s)
+ | [: `c; s :] ->
+ ("ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s)
+ | [: :] ep -> err (bp, ep) "antiquotation not terminated" ]
+ and antiquot bp len =
+ parser
+ [ [: `'$' :] -> ("ANTIQUOT", ":" ^ get_buff len)
+ | [: `('a'..'z' | 'A'..'Z' | '0'..'9' as c); s :] ->
+ antiquot bp (store len c) s
+ | [: `':'; s :] ->
+ let k = get_buff len in
+ ("ANTIQUOT", k ^ ":" ^ locate_or_antiquot_rest bp 0 s)
+ | [: `'\\'; `c; s :] ->
+ ("ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s)
+ | [: `c; s :] ->
+ ("ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s)
+ | [: :] ep -> err (bp, ep) "antiquotation not terminated" ]
+ and locate_or_antiquot_rest bp len =
+ parser
+ [ [: `'$' :] -> get_buff len
+ | [: `'\\'; `c; s :] -> locate_or_antiquot_rest bp (store len c) s
+ | [: `c; s :] -> locate_or_antiquot_rest bp (store len c) s
+ | [: :] ep -> err (bp, ep) "antiquotation not terminated" ]
+ and quotation bp len =
+ parser
+ [ [: `'>'; s :] -> maybe_end_quotation bp len s
+ | [: `'<'; s :] ->
+ quotation bp (maybe_nested_quotation bp (store len '<') s) s
+ | [: `'\\';
+ len =
+ parser
+ [ [: `('>' | '<' | '\\' as c) :] -> store len c
+ | [: :] -> store len '\\' ];
+ s :] ->
+ quotation bp len s
+ | [: `c; s :] -> quotation bp (store len c) s
+ | [: :] ep -> err (bp, ep) "quotation not terminated" ]
+ and maybe_nested_quotation bp len =
+ parser
+ [ [: `'<'; s :] -> mstore (quotation bp (store len '<') s) ">>"
+ | [: `':'; len = ident (store len ':');
+ a =
+ parser
+ [ [: `'<'; s :] -> mstore (quotation bp (store len '<') s) ">>"
+ | [: :] -> len ] :] ->
+ a
+ | [: :] -> len ]
+ and maybe_end_quotation bp len =
+ parser
+ [ [: `'>' :] -> len
+ | [: a = quotation bp (store len '>') :] -> a ]
+ and left_paren bp =
+ parser
+ [ [: `'*'; _ = comment bp; a = next_token True :] -> a
+ | [: :] ep -> keyword_or_error (bp, ep) "(" ]
+ and comment bp =
+ parser
+ [ [: `'('; s :] -> left_paren_in_comment bp s
+ | [: `'*'; s :] -> star_in_comment bp s
+ | [: `'"'; _ = string bp 0; s :] -> comment bp s
+ | [: `'''; s :] -> quote_in_comment bp s
+ | [: `c; s :] -> comment bp s
+ | [: :] ep -> err (bp, ep) "comment not terminated" ]
+ and quote_in_comment bp =
+ parser
+ [ [: `'''; s :] -> comment bp s
+ | [: `'\013'; s :] -> quote_cr_in_comment bp s
+ | [: `'\\'; s :] -> quote_antislash_in_comment bp s
+ | [: `'('; s :] -> quote_left_paren_in_comment bp s
+ | [: `'*'; s :] -> quote_star_in_comment bp s
+ | [: `'"'; s :] -> quote_doublequote_in_comment bp s
+ | [: `_; s :] -> quote_any_in_comment bp s
+ | [: s :] -> comment bp s ]
+ and quote_any_in_comment bp =
+ parser
+ [ [: `'''; s :] -> comment bp s
+ | [: s :] -> comment bp s ]
+ and quote_cr_in_comment bp =
+ parser
+ [ [: `'\010'; s :] -> quote_any_in_comment bp s
+ | [: s :] -> quote_any_in_comment bp s ]
+ and quote_left_paren_in_comment bp =
+ parser
+ [ [: `'''; s :] -> comment bp s
+ | [: s :] -> left_paren_in_comment bp s ]
+ and quote_star_in_comment bp =
+ parser
+ [ [: `'''; s :] -> comment bp s
+ | [: s :] -> star_in_comment bp s ]
+ and quote_doublequote_in_comment bp =
+ parser
+ [ [: `'''; s :] -> comment bp s
+ | [: _ = string bp 0; s :] -> comment bp s ]
+ and quote_antislash_in_comment bp =
+ parser
+ [ [: `'''; s :] -> quote_antislash_quote_in_comment bp s
+ | [: `('\\' | '"' | 'n' | 't' | 'b' | 'r'); s :] ->
+ quote_any_in_comment bp s
+ | [: `('0'..'9'); s :] -> quote_antislash_digit_in_comment bp s
+ | [: `'x'; s :] -> quote_antislash_x_in_comment bp s
+ | [: s :] -> comment bp s ]
+ and quote_antislash_quote_in_comment bp =
+ parser
+ [ [: `'''; s :] -> comment bp s
+ | [: s :] -> quote_in_comment bp s ]
+ and quote_antislash_digit_in_comment bp =
+ parser
+ [ [: `('0'..'9'); s :] -> quote_antislash_digit2_in_comment bp s
+ | [: s :] -> comment bp s ]
+ and quote_antislash_digit2_in_comment bp =
+ parser
+ [ [: `('0'..'9'); s :] -> quote_any_in_comment bp s
+ | [: s :] -> comment bp s ]
+ and quote_antislash_x_in_comment bp =
+ parser
+ [ [: _ = hexa; s :] -> quote_antislash_x_digit_in_comment bp s
+ | [: s :] -> comment bp s ]
+ and quote_antislash_x_digit_in_comment bp =
+ parser
+ [ [: _ = hexa; s :] -> quote_any_in_comment bp s
+ | [: s :] -> comment bp s ]
+ and left_paren_in_comment bp =
+ parser
+ [ [: `'*'; s :] -> do { comment bp s; comment bp s }
+ | [: a = comment bp :] -> a ]
+ and star_in_comment bp =
+ parser
+ [ [: `')' :] -> ()
+ | [: a = comment bp :] -> a ]
+ and linedir n s =
+ match stream_peek_nth n s with
+ [ Some (' ' | '\t') -> linedir (n + 1) s
+ | Some ('0'..'9') -> linedir_digits (n + 1) s
+ | _ -> False ]
+ and linedir_digits n s =
+ match stream_peek_nth n s with
+ [ Some ('0'..'9') -> linedir_digits (n + 1) s
+ | _ -> linedir_quote n s ]
+ and linedir_quote n s =
+ match stream_peek_nth n s with
+ [ Some (' ' | '\t') -> linedir_quote (n + 1) s
+ | Some '"' -> True
+ | _ -> False ]
+ and any_to_nl =
+ parser
+ [ [: `'\013' | '\010' :] ep -> bolpos.val := ep
+ | [: `_; s :] -> any_to_nl s
+ | [: :] -> () ]
+ in
+ fun cstrm ->
+ try
+ let glex = glexr.val in
+ let comm_bp = Stream.count cstrm in
+ let r = next_token False cstrm in
+ do {
+ match glex.tok_comm with
+ [ Some list ->
+ if fst (snd r) > comm_bp then
+ let comm_loc = (comm_bp, fst (snd r)) in
+ glex.tok_comm := Some [comm_loc :: list]
+ else ()
+ | None -> () ];
+ r
+ }
+ with
+ [ Stream.Error str ->
+ err (Stream.count cstrm, Stream.count cstrm + 1) str ]
+;
+*)
+
+let next_token_fun dfa ssd find_kwd bolpos glexr =
let keyword_or_error loc s =
try ("", find_kwd s), loc with
Not_found ->
if !error_on_unknown_keywords then err loc ("illegal token: " ^ s)
else ("", s), loc
in
- let rec next_token (strm__ : _ Stream.t) =
+ let rec next_token after_space (strm__ : _ Stream.t) =
let bp = Stream.count strm__ in
match Stream.peek strm__ with
- Some (' ' | '\010' | '\013' | '\t' | '\026' | '\012') ->
- Stream.junk strm__; next_token strm__
+ Some ('\010' | '\013') ->
+ Stream.junk strm__;
+ let s = strm__ in
+ let ep = Stream.count strm__ in bolpos := ep; next_token true s
+ | Some (' ' | '\t' | '\026' | '\012') ->
+ Stream.junk strm__; next_token true strm__
+ | Some '#' when bp = !bolpos ->
+ Stream.junk strm__;
+ let s = strm__ in
+ if linedir 1 s then begin any_to_nl s; next_token true s end
+ else keyword_or_error (bp, bp + 1) "#"
| Some '(' -> Stream.junk strm__; left_paren bp strm__
- | Some '#' ->
- Stream.junk strm__; let s = strm__ in spaces_tabs s; linenum bp s
| Some ('A'..'Z' | '\192'..'\214' | '\216'..'\222' as c) ->
Stream.junk strm__;
let s = strm__ in
@@ -174,8 +522,8 @@ let next_token_fun dfa find_kwd =
| Some '\'' ->
Stream.junk strm__;
let s = strm__ in
- begin match Stream.npeek 3 s with
- [_; '\''; _] | ['\\'; _; _] | ['\x0D'; '\x0A'; '\''] ->
+ begin match Stream.npeek 2 s with
+ [_; '\''] | ['\\'; _] ->
let tok = "CHAR", get_buff (char bp 0 s) in
let loc = bp, Stream.count s in tok, loc
| _ -> keyword_or_error (bp, Stream.count s) "'"
@@ -276,7 +624,7 @@ let next_token_fun dfa find_kwd =
try
match Stream.peek strm__ with
Some '.' -> Stream.junk strm__; ".."
- | _ -> "."
+ | _ -> if ssd && after_space then " ." else "."
with
Stream.Failure -> raise (Stream.Error "")
in
@@ -511,7 +859,7 @@ let next_token_fun dfa find_kwd =
try comment bp strm__ with
Stream.Failure -> raise (Stream.Error "")
in
- begin try next_token strm__ with
+ begin try next_token true strm__ with
Stream.Failure -> raise (Stream.Error "")
end
| _ -> let ep = Stream.count strm__ in keyword_or_error (bp, ep) "("
@@ -533,47 +881,26 @@ let next_token_fun dfa find_kwd =
and quote_in_comment bp (strm__ : _ Stream.t) =
match Stream.peek strm__ with
Some '\'' -> Stream.junk strm__; comment bp strm__
- | Some '\013' -> Stream.junk strm__; quote_cr_in_comment bp strm__
- | Some '\\' -> Stream.junk strm__; quote_antislash_in_comment bp strm__
- | Some '(' -> Stream.junk strm__; quote_left_paren_in_comment bp strm__
- | Some '*' -> Stream.junk strm__; quote_star_in_comment bp strm__
- | Some '\"' -> Stream.junk strm__; quote_doublequote_in_comment bp strm__
- | Some _ -> Stream.junk strm__; quote_any_in_comment bp strm__
- | _ -> comment bp strm__
+ | Some '\\' -> Stream.junk strm__; quote_antislash_in_comment bp 0 strm__
+ | _ ->
+ let s = strm__ in
+ begin match Stream.npeek 2 s with
+ [_; '\''] -> Stream.junk s; Stream.junk s
+ | _ -> ()
+ end;
+ comment bp s
and quote_any_in_comment bp (strm__ : _ Stream.t) =
match Stream.peek strm__ with
Some '\'' -> Stream.junk strm__; comment bp strm__
| _ -> comment bp strm__
- and quote_cr_in_comment bp (strm__ : _ Stream.t) =
- match Stream.peek strm__ with
- Some '\010' -> Stream.junk strm__; quote_any_in_comment bp strm__
- | _ -> quote_any_in_comment bp strm__
- and quote_left_paren_in_comment bp (strm__ : _ Stream.t) =
- match Stream.peek strm__ with
- Some '\'' -> Stream.junk strm__; comment bp strm__
- | _ -> left_paren_in_comment bp strm__
- and quote_star_in_comment bp (strm__ : _ Stream.t) =
+ and quote_antislash_in_comment bp len (strm__ : _ Stream.t) =
match Stream.peek strm__ with
Some '\'' -> Stream.junk strm__; comment bp strm__
- | _ -> star_in_comment bp strm__
- and quote_doublequote_in_comment bp (strm__ : _ Stream.t) =
- match Stream.peek strm__ with
- Some '\'' -> Stream.junk strm__; comment bp strm__
- | _ -> let _ = string bp 0 strm__ in comment bp strm__
- and quote_antislash_in_comment bp (strm__ : _ Stream.t) =
- match Stream.peek strm__ with
- Some '\'' ->
- Stream.junk strm__; quote_antislash_quote_in_comment bp strm__
| Some ('\\' | '\"' | 'n' | 't' | 'b' | 'r') ->
Stream.junk strm__; quote_any_in_comment bp strm__
| Some ('0'..'9') ->
Stream.junk strm__; quote_antislash_digit_in_comment bp strm__
- | Some 'x' -> Stream.junk strm__; quote_antislash_x_in_comment bp strm__
| _ -> comment bp strm__
- and quote_antislash_quote_in_comment bp (strm__ : _ Stream.t) =
- match Stream.peek strm__ with
- Some '\'' -> Stream.junk strm__; comment bp strm__
- | _ -> quote_in_comment bp strm__
and quote_antislash_digit_in_comment bp (strm__ : _ Stream.t) =
match Stream.peek strm__ with
Some ('0'..'9') ->
@@ -583,20 +910,6 @@ let next_token_fun dfa find_kwd =
match Stream.peek strm__ with
Some ('0'..'9') -> Stream.junk strm__; quote_any_in_comment bp strm__
| _ -> comment bp strm__
- and quote_antislash_x_in_comment bp (strm__ : _ Stream.t) =
- match
- try Some (hexa strm__) with
- Stream.Failure -> None
- with
- Some _ -> quote_antislash_x_digit_in_comment bp strm__
- | _ -> comment bp strm__
- and quote_antislash_x_digit_in_comment bp (strm__ : _ Stream.t) =
- match
- try Some (hexa strm__) with
- Stream.Failure -> None
- with
- Some _ -> quote_any_in_comment bp strm__
- | _ -> comment bp strm__
and left_paren_in_comment bp (strm__ : _ Stream.t) =
match Stream.peek strm__ with
Some '*' ->
@@ -606,54 +919,54 @@ let next_token_fun dfa find_kwd =
match Stream.peek strm__ with
Some ')' -> Stream.junk strm__; ()
| _ -> comment bp strm__
- and linenum bp (strm__ : _ Stream.t) =
- match Stream.peek strm__ with
- Some ('0'..'9') ->
- Stream.junk strm__;
- let _ =
- try digits strm__ with
- Stream.Failure -> raise (Stream.Error "")
- in
- let _ =
- try spaces_tabs strm__ with
- Stream.Failure -> raise (Stream.Error "")
- in
- begin match Stream.peek strm__ with
- Some '\"' ->
- Stream.junk strm__;
- let _ =
- try any_to_nl strm__ with
- Stream.Failure -> raise (Stream.Error "")
- in
- next_token strm__
- | _ -> raise (Stream.Error "")
- end
- | _ -> keyword_or_error (bp, bp + 1) "#"
- and spaces_tabs (strm__ : _ Stream.t) =
- match Stream.peek strm__ with
- Some (' ' | '\t') -> Stream.junk strm__; spaces_tabs strm__
- | _ -> ()
- and digits (strm__ : _ Stream.t) =
- match Stream.peek strm__ with
- Some ('0'..'9') -> Stream.junk strm__; digits strm__
- | _ -> ()
+ and linedir n s =
+ match stream_peek_nth n s with
+ Some (' ' | '\t') -> linedir (n + 1) s
+ | Some ('0'..'9') -> linedir_digits (n + 1) s
+ | _ -> false
+ and linedir_digits n s =
+ match stream_peek_nth n s with
+ Some ('0'..'9') -> linedir_digits (n + 1) s
+ | _ -> linedir_quote n s
+ and linedir_quote n s =
+ match stream_peek_nth n s with
+ Some (' ' | '\t') -> linedir_quote (n + 1) s
+ | Some '\"' -> true
+ | _ -> false
and any_to_nl (strm__ : _ Stream.t) =
match Stream.peek strm__ with
- Some ('\013' | '\010') -> Stream.junk strm__; ()
+ Some ('\013' | '\010') ->
+ Stream.junk strm__; let ep = Stream.count strm__ in bolpos := ep
| Some _ -> Stream.junk strm__; any_to_nl strm__
| _ -> ()
in
fun cstrm ->
- try next_token cstrm with
+ try
+ let glex = !glexr in
+ let comm_bp = Stream.count cstrm in
+ let r = next_token false cstrm in
+ begin match glex.tok_comm with
+ Some list ->
+ if fst (snd r) > comm_bp then
+ let comm_loc = comm_bp, fst (snd r) in
+ glex.tok_comm <- Some (comm_loc :: list)
+ | None -> ()
+ end;
+ r
+ with
Stream.Error str -> err (Stream.count cstrm, Stream.count cstrm + 1) str
;;
+
let dollar_for_antiquotation = ref true;;
+let specific_space_dot = ref false;;
-let func kwd_table =
+let func kwd_table glexr =
+ let bolpos = ref 0 in
let find = Hashtbl.find kwd_table in
let dfa = !dollar_for_antiquotation in
- Token.lexer_func_of_parser (next_token_fun dfa find)
+ let ssd = !specific_space_dot in
+ Token.lexer_func_of_parser (next_token_fun dfa ssd find bolpos glexr)
;;
let rec check_keyword_stream (strm__ : _ Stream.t) =
@@ -862,9 +1175,22 @@ let tok_match =
let gmake () =
let kwd_table = Hashtbl.create 301 in
let id_table = Hashtbl.create 301 in
- {tok_func = func kwd_table; tok_using = using_token kwd_table id_table;
- tok_removing = removing_token kwd_table id_table; tok_match = tok_match;
- tok_text = text}
+ let glexr =
+ ref
+ {tok_func = (fun _ -> raise (Match_failure ("plexer.ml", 952, 17)));
+ tok_using = (fun _ -> raise (Match_failure ("plexer.ml", 952, 37)));
+ tok_removing = (fun _ -> raise (Match_failure ("plexer.ml", 952, 60)));
+ tok_match = (fun _ -> raise (Match_failure ("plexer.ml", 953, 18)));
+ tok_text = (fun _ -> raise (Match_failure ("plexer.ml", 953, 37)));
+ tok_comm = None}
+ in
+ let glex =
+ {tok_func = func kwd_table glexr;
+ tok_using = using_token kwd_table id_table;
+ tok_removing = removing_token kwd_table id_table; tok_match = tok_match;
+ tok_text = text; tok_comm = None}
+ in
+ glexr := glex; glex
;;
let tparse =
@@ -883,6 +1209,15 @@ let tparse =
let make () =
let kwd_table = Hashtbl.create 301 in
let id_table = Hashtbl.create 301 in
- {func = func kwd_table; using = using_token kwd_table id_table;
+ let glexr =
+ ref
+ {tok_func = (fun _ -> raise (Match_failure ("plexer.ml", 981, 17)));
+ tok_using = (fun _ -> raise (Match_failure ("plexer.ml", 981, 37)));
+ tok_removing = (fun _ -> raise (Match_failure ("plexer.ml", 981, 60)));
+ tok_match = (fun _ -> raise (Match_failure ("plexer.ml", 982, 18)));
+ tok_text = (fun _ -> raise (Match_failure ("plexer.ml", 982, 37)));
+ tok_comm = None}
+ in
+ {func = func kwd_table glexr; using = using_token kwd_table id_table;
removing = removing_token kwd_table id_table; tparse = tparse; text = text}
;;
diff --git a/camlp4/ocaml_src/lib/plexer.mli b/camlp4/ocaml_src/lib/plexer.mli
index d682f83ae..19bc0ce1b 100644
--- a/camlp4/ocaml_src/lib/plexer.mli
+++ b/camlp4/ocaml_src/lib/plexer.mli
@@ -54,6 +54,12 @@ val dollar_for_antiquotation : bool ref;;
lexer where the dollar sign is used for antiquotations. If False,
the dollar sign can be used as token. *)
+val specific_space_dot : bool ref;;
+ (** When False (default), the next call to [Plexer.make ()] returns a
+ lexer where the dots can be preceded by spaces. If True, dots
+ preceded by spaces return the keyword " ." (space dot), otherwise
+ return the keyword "." (dot). *)
+
val no_quotations : bool ref;;
(** When True, all lexers built by [Plexer.make ()] do not lex the
quotation syntax any more. Default is False (quotations are
diff --git a/camlp4/ocaml_src/lib/stdpp.ml b/camlp4/ocaml_src/lib/stdpp.ml
index 0830e6842..d91ee78c0 100644
--- a/camlp4/ocaml_src/lib/stdpp.ml
+++ b/camlp4/ocaml_src/lib/stdpp.ml
@@ -23,23 +23,77 @@ let raise_with_loc loc exc =
let line_of_loc fname (bp, ep) =
try
let ic = open_in_bin fname in
- let rec loop lin col cnt =
- if cnt < bp then
- let (lin, col) =
- match input_char ic with
- '\n' -> lin + 1, 0
- | _ -> lin, col + 1
- in
- loop lin col (cnt + 1)
- else lin, col, col + ep - bp
+ let strm = Stream.of_channel ic in
+ let rec loop fname lin =
+ let rec not_a_line_dir col (strm__ : _ Stream.t) =
+ let cnt = Stream.count strm__ in
+ match Stream.peek strm__ with
+ Some c ->
+ Stream.junk strm__;
+ let s = strm__ in
+ if cnt < bp then
+ if c = '\n' then loop fname (lin + 1)
+ else not_a_line_dir (col + 1) s
+ else let col = col - (cnt - bp) in fname, lin, col, col + ep - bp
+ | _ -> raise Stream.Failure
+ in
+ let rec a_line_dir str n col (strm__ : _ Stream.t) =
+ match Stream.peek strm__ with
+ Some '\n' -> Stream.junk strm__; loop str n
+ | Some _ -> Stream.junk strm__; a_line_dir str n (col + 1) strm__
+ | _ -> raise Stream.Failure
+ in
+ let rec spaces col (strm__ : _ Stream.t) =
+ match Stream.peek strm__ with
+ Some ' ' -> Stream.junk strm__; spaces (col + 1) strm__
+ | _ -> col
+ in
+ let rec check_string str n col (strm__ : _ Stream.t) =
+ match Stream.peek strm__ with
+ Some '\"' ->
+ Stream.junk strm__;
+ let col =
+ try spaces (col + 1) strm__ with
+ Stream.Failure -> raise (Stream.Error "")
+ in
+ a_line_dir str n col strm__
+ | Some c when c <> '\n' ->
+ Stream.junk strm__;
+ check_string (str ^ String.make 1 c) n (col + 1) strm__
+ | _ -> not_a_line_dir col strm__
+ in
+ let check_quote n col (strm__ : _ Stream.t) =
+ match Stream.peek strm__ with
+ Some '\"' -> Stream.junk strm__; check_string "" n (col + 1) strm__
+ | _ -> not_a_line_dir col strm__
+ in
+ let rec check_num n col (strm__ : _ Stream.t) =
+ match Stream.peek strm__ with
+ Some ('0'..'9' as c) ->
+ Stream.junk strm__;
+ check_num (10 * n + Char.code c - Char.code '0') (col + 1) strm__
+ | _ -> let col = spaces col strm__ in check_quote n col strm__
+ in
+ let begin_line (strm__ : _ Stream.t) =
+ match Stream.peek strm__ with
+ Some '#' ->
+ Stream.junk strm__;
+ let col =
+ try spaces 1 strm__ with
+ Stream.Failure -> raise (Stream.Error "")
+ in
+ check_num 0 col strm__
+ | _ -> not_a_line_dir 0 strm__
+ in
+ begin_line strm
in
let r =
- try loop 1 0 0 with
- End_of_file -> 1, bp, ep
+ try loop fname 1 with
+ Stream.Failure -> fname, 1, bp, ep
in
close_in ic; r
with
- Sys_error _ -> 1, bp, ep
+ Sys_error _ -> fname, 1, bp, ep
;;
let loc_name = ref "loc";;
diff --git a/camlp4/ocaml_src/lib/stdpp.mli b/camlp4/ocaml_src/lib/stdpp.mli
index 5afd1b2db..68c0cb6ad 100644
--- a/camlp4/ocaml_src/lib/stdpp.mli
+++ b/camlp4/ocaml_src/lib/stdpp.mli
@@ -25,10 +25,12 @@ val raise_with_loc : int * int -> exn -> 'a;;
(** [raise_with_loc loc e], if [e] is already the exception [Exc_located],
re-raise it, else raise the exception [Exc_located loc e]. *)
-val line_of_loc : string -> int * int -> int * int * int;;
+val line_of_loc : string -> int * int -> string * int * int * int;;
(** [line_of_loc fname loc] reads the file [fname] up to the
- location [loc] and returns the line number and the characters
- location in the line *)
+ location [loc] and returns the real input file, the line number
+ and the characters location in the line; the real input file
+ can be different from [fname] because of possibility of line
+ directives typically generated by /lib/cpp. *)
val loc_name : string ref;;
(** Name of the location variable used in grammars and in the predefined
diff --git a/camlp4/ocaml_src/lib/token.ml b/camlp4/ocaml_src/lib/token.ml
index 63f09bc19..67aaffdec 100644
--- a/camlp4/ocaml_src/lib/token.ml
+++ b/camlp4/ocaml_src/lib/token.ml
@@ -26,7 +26,8 @@ type 'te glexer =
tok_using : pattern -> unit;
tok_removing : pattern -> unit;
tok_match : pattern -> 'te -> string;
- tok_text : pattern -> string }
+ tok_text : pattern -> string;
+ mutable tok_comm : location list option }
;;
type lexer =
{ func : t lexer_func;
diff --git a/camlp4/ocaml_src/lib/token.mli b/camlp4/ocaml_src/lib/token.mli
index 4e14469d6..e561e28e8 100644
--- a/camlp4/ocaml_src/lib/token.mli
+++ b/camlp4/ocaml_src/lib/token.mli
@@ -47,7 +47,8 @@ type 'te glexer =
tok_using : pattern -> unit;
tok_removing : pattern -> unit;
tok_match : pattern -> 'te -> string;
- tok_text : pattern -> string }
+ tok_text : pattern -> string;
+ mutable tok_comm : location list option }
;;
(** The type for a lexer used by Camlp4 grammars.
- The field [tok_func] is the main lexer function. See [lexer_func]
@@ -66,7 +67,9 @@ type 'te glexer =
efficency, write it as a function returning functions according
to the values of the pattern, not a function with two parameters.
- The field [tok_text] returns the name of some token pattern,
- used in error messages. *)
+ used in error messages.
+- The field [tok_comm] if not None asks the lexer to record the
+ locations of the comments. *)
val lexer_text : pattern -> string;;
(** A simple [tok_text] function for lexers *)