diff options
Diffstat (limited to 'camlp4/ocaml_src/lib')
-rw-r--r-- | camlp4/ocaml_src/lib/Makefile | 14 | ||||
-rw-r--r-- | camlp4/ocaml_src/lib/grammar.ml | 80 | ||||
-rw-r--r-- | camlp4/ocaml_src/lib/grammar.mli | 17 | ||||
-rw-r--r-- | camlp4/ocaml_src/lib/plexer.ml | 521 | ||||
-rw-r--r-- | camlp4/ocaml_src/lib/plexer.mli | 6 | ||||
-rw-r--r-- | camlp4/ocaml_src/lib/stdpp.ml | 78 | ||||
-rw-r--r-- | camlp4/ocaml_src/lib/stdpp.mli | 8 | ||||
-rw-r--r-- | camlp4/ocaml_src/lib/token.ml | 3 | ||||
-rw-r--r-- | camlp4/ocaml_src/lib/token.mli | 7 |
9 files changed, 609 insertions, 125 deletions
diff --git a/camlp4/ocaml_src/lib/Makefile b/camlp4/ocaml_src/lib/Makefile index 524831704..d587e7446 100644 --- a/camlp4/ocaml_src/lib/Makefile +++ b/camlp4/ocaml_src/lib/Makefile @@ -36,17 +36,17 @@ compare: done install: - -$(MKDIR) $(LIBDIR)/camlp4 - cp $(TARGET) *.mli $(LIBDIR)/camlp4/. - cp *.cmi $(LIBDIR)/camlp4/. - if test -f $(TARGET:.cma=.cmxa); then $(MAKE) installopt LIBDIR=$(LIBDIR); fi + -$(MKDIR) "$(LIBDIR)/camlp4" + cp $(TARGET) *.mli "$(LIBDIR)/camlp4/." + cp *.cmi "$(LIBDIR)/camlp4/." + if test -f $(TARGET:.cma=.cmxa); then $(MAKE) installopt LIBDIR="$(LIBDIR)"; fi installopt: - cp $(TARGET:.cma=.cmxa) *.cmx $(LIBDIR)/camlp4/. + cp $(TARGET:.cma=.cmxa) *.cmx "$(LIBDIR)/camlp4/." if test -f $(TARGET:.cma=.lib); then \ - cp $(TARGET:.cma=.lib) $(LIBDIR)/camlp4/.; \ + cp $(TARGET:.cma=.lib) "$(LIBDIR)/camlp4/."; \ else \ - tar cf - $(TARGET:.cma=.a) | (cd $(LIBDIR)/camlp4/.; tar xf -); \ + tar cf - $(TARGET:.cma=.a) | (cd "$(LIBDIR)/camlp4/."; tar xf -); \ fi include .depend diff --git a/camlp4/ocaml_src/lib/grammar.ml b/camlp4/ocaml_src/lib/grammar.ml index 66e7d3580..196a6b954 100644 --- a/camlp4/ocaml_src/lib/grammar.ml +++ b/camlp4/ocaml_src/lib/grammar.ml @@ -120,7 +120,71 @@ let print_entry ppf e = Dlevels elev -> print_levels ppf elev | Dparser _ -> fprintf ppf "<parser>" end; - fprintf ppf " ]@]@." + fprintf ppf " ]@]" +;; + +let iter_entry f e = + let treated = ref [] in + let rec do_entry e = + if List.memq e !treated then () + else + begin + treated := e :: !treated; + f e; + match e.edesc with + Dlevels ll -> List.iter do_level ll + | Dparser _ -> () + end + and do_level lev = do_tree lev.lsuffix; do_tree lev.lprefix + and do_tree = + function + Node n -> do_node n + | LocAct (_, _) | DeadEnd -> () + and do_node n = do_symbol n.node; do_tree n.son; do_tree n.brother + and do_symbol = + function + Smeta (_, sl, _) -> List.iter do_symbol sl + | Snterm e | Snterml (e, _) -> do_entry e + | Slist0 s | Slist1 s | Sopt s -> do_symbol s + | Slist0sep (s1, s2) | Slist1sep (s1, s2) -> do_symbol s1; do_symbol s2 + | Stree t -> do_tree t + | Sself | Snext | Stoken _ -> () + in + do_entry e +;; + +let fold_entry f e init = + let treated = ref [] in + let rec do_entry accu e = + if List.memq e !treated then accu + else + begin + treated := e :: !treated; + let accu = f e accu in + match e.edesc with + Dlevels ll -> List.fold_left do_level accu ll + | Dparser _ -> accu + end + and do_level accu lev = + let accu = do_tree accu lev.lsuffix in do_tree accu lev.lprefix + and do_tree accu = + function + Node n -> do_node accu n + | LocAct (_, _) | DeadEnd -> accu + and do_node accu n = + let accu = do_symbol accu n.node in + let accu = do_tree accu n.son in do_tree accu n.brother + and do_symbol accu = + function + Smeta (_, sl, _) -> List.fold_left do_symbol accu sl + | Snterm e | Snterml (e, _) -> do_entry accu e + | Slist0 s | Slist1 s | Sopt s -> do_symbol accu s + | Slist0sep (s1, s2) | Slist1sep (s1, s2) -> + let accu = do_symbol accu s1 in do_symbol accu s2 + | Stree t -> do_tree accu t + | Sself | Snext | Stoken _ -> accu + in + do_entry init e ;; type g = Token.t Gramext.grammar;; @@ -774,7 +838,7 @@ let glexer_of_lexer lexer = {Token.tok_func = lexer.Token.func; Token.tok_using = lexer.Token.using; Token.tok_removing = lexer.Token.removing; Token.tok_match = tematch lexer.Token.tparse; - Token.tok_text = lexer.Token.text} + Token.tok_text = lexer.Token.text; Token.tok_comm = None} ;; let create lexer = gcreate (glexer_of_lexer lexer);; @@ -933,12 +997,12 @@ module Entry = edesc = Dparser (Obj.magic p)} ;; external obj : 'a e -> te Gramext.g_entry = "%identity";; - let print e = print_entry std_formatter (obj e);; + let print e = printf "%a@." print_entry (obj e);; let find e s = find_entry (obj e) s;; end ;; -let gen_tokens g con = +let tokens g con = let list = ref [] in Hashtbl.iter (fun (p_con, p_prm) c -> if p_con = con then list := (p_prm, !c) :: !list) @@ -946,7 +1010,7 @@ let gen_tokens g con = !list ;; -let tokens g = gen_tokens (grammar_obj g);; +let glexer g = g.glexer;; let warning_verbose = Gramext.warning_verbose;; @@ -960,6 +1024,7 @@ module type S = type parsable;; val parsable : char Stream.t -> parsable;; val tokens : string -> (string * int) list;; + val glexer : te Token.glexer;; module Entry : sig type 'a e;; @@ -998,7 +1063,8 @@ module GGMake (R : ReinitType) (L : GLexerType) = type parsable = char Stream.t * (te Stream.t * Token.location_function);; let gram = gcreate L.lexer;; let parsable cs = cs, L.lexer.Token.tok_func cs;; - let tokens = gen_tokens gram;; + let tokens = tokens gram;; + let glexer = glexer gram;; module Entry = struct type 'a e = te g_entry;; @@ -1020,7 +1086,7 @@ module GGMake (R : ReinitType) (L : GLexerType) = (fun _ _ _ (strm__ : _ Stream.t) -> raise Stream.Failure); edesc = Dparser (Obj.magic p)} ;; - let print e = print_entry std_formatter (obj e);; + let print e = printf "%a@." print_entry (obj e);; end ;; module Unsafe = diff --git a/camlp4/ocaml_src/lib/grammar.mli b/camlp4/ocaml_src/lib/grammar.mli index c29e8f8a7..d38b449f9 100644 --- a/camlp4/ocaml_src/lib/grammar.mli +++ b/camlp4/ocaml_src/lib/grammar.mli @@ -35,6 +35,8 @@ val tokens : g -> string -> (string * int) list;; list. - The call [Grammar.token g "IDENT"] returns the list of all usages of the pattern "IDENT" in the [EXTEND] statements. *) +val glexer : g -> Token.t Token.glexer;; + (** Return the lexer used by the grammar *) module Entry : sig @@ -100,6 +102,7 @@ module type S = type parsable;; val parsable : char Stream.t -> parsable;; val tokens : string -> (string * int) list;; + val glexer : te Token.glexer;; module Entry : sig type 'a e;; @@ -156,6 +159,20 @@ val strict_parsing : bool ref;; val print_entry : Format.formatter -> 'te Gramext.g_entry -> unit;; (** General printer for all kinds of entries (obj entries) *) +val iter_entry : + ('te Gramext.g_entry -> unit) -> 'te Gramext.g_entry -> unit;; + (** [Grammar.iter_entry f e] applies [f] to the entry [e] and + transitively all entries called by [e]. The order in which + the entries are passed to [f] is the order they appear in + each entry. Each entry is passed only once. *) + +val fold_entry : + ('te Gramext.g_entry -> 'a -> 'a) -> 'te Gramext.g_entry -> 'a -> 'a;; + (** [Grammar.fold_entry f e init] computes [(f eN .. (f e2 (f e1 init)))], + where [e1 .. eN] are [e] and transitively all entries called by [e]. + The order in which the entries are passed to [f] is the order they + appear in each entry. Each entry is passed only once. *) + (**/**) (*** deprecated since version 3.05; use rather the functor GMake *) diff --git a/camlp4/ocaml_src/lib/plexer.ml b/camlp4/ocaml_src/lib/plexer.ml index 718e17b4f..20471597e 100644 --- a/camlp4/ocaml_src/lib/plexer.ml +++ b/camlp4/ocaml_src/lib/plexer.ml @@ -36,6 +36,16 @@ let get_buff len = String.sub !buff 0 len;; (* The lexer *) +let stream_peek_nth n strm = + let rec loop n = + function + [] -> None + | [x] -> if n == 1 then Some x else None + | _ :: l -> loop (n - 1) l + in + loop n (Stream.npeek n strm) +;; + let rec ident len (strm__ : _ Stream.t) = match Stream.peek strm__ with Some @@ -132,21 +142,359 @@ and end_exponent_part_under len (strm__ : _ Stream.t) = let error_on_unknown_keywords = ref false;; let err loc msg = raise_with_loc loc (Token.Error msg);; -let next_token_fun dfa find_kwd = +(* +value next_token_fun dfa find_kwd = + let keyword_or_error loc s = + try (("", find_kwd s), loc) with + [ Not_found -> + if error_on_unknown_keywords.val then err loc ("illegal token: " ^ s) + else (("", s), loc) ] + in + let rec next_token = + parser bp + [ [: `' ' | '\010' | '\013' | '\t' | '\026' | '\012'; s :] -> + next_token s + | [: `'('; s :] -> left_paren bp s + | [: `'#'; s :] -> do { spaces_tabs s; linenum bp s } + | [: `('A'..'Z' | '\192'..'\214' | '\216'..'\222' as c); s :] -> + let id = get_buff (ident (store 0 c) s) in + let loc = (bp, Stream.count s) in + (try ("", find_kwd id) with [ Not_found -> ("UIDENT", id) ], loc) + | [: `('a'..'z' | '\223'..'\246' | '\248'..'\255' | '_' as c); s :] -> + let id = get_buff (ident (store 0 c) s) in + let loc = (bp, Stream.count s) in + (try ("", find_kwd id) with [ Not_found -> ("LIDENT", id) ], loc) + | [: `('1'..'9' as c); s :] -> + let tok = number (store 0 c) s in + let loc = (bp, Stream.count s) in + (tok, loc) + | [: `'0'; s :] -> + let tok = base_number (store 0 '0') s in + let loc = (bp, Stream.count s) in + (tok, loc) + | [: `'''; s :] -> + match Stream.npeek 3 s with + [ [_; '''; _] | ['\\'; _; _] | ['\x0D'; '\x0A'; '''] -> + let tok = ("CHAR", get_buff (char bp 0 s)) in + let loc = (bp, Stream.count s) in + (tok, loc) + | _ -> keyword_or_error (bp, Stream.count s) "'" ] + | [: `'"'; s :] -> + let tok = ("STRING", get_buff (string bp 0 s)) in + let loc = (bp, Stream.count s) in + (tok, loc) + | [: `'$'; s :] -> + let tok = dollar bp 0 s in + let loc = (bp, Stream.count s) in + (tok, loc) + | [: `('!' | '=' | '@' | '^' | '&' | '+' | '-' | '*' | '/' | '%' as c); + s :] -> + let id = get_buff (ident2 (store 0 c) s) in + keyword_or_error (bp, Stream.count s) id + | [: `('~' as c); + a = + parser + [ [: `('a'..'z' as c); len = ident (store 0 c) :] ep -> + (("TILDEIDENT", get_buff len), (bp, ep)) + | [: s :] -> + let id = get_buff (ident2 (store 0 c) s) in + keyword_or_error (bp, Stream.count s) id ] :] -> + a + | [: `('?' as c); + a = + parser + [ [: `('a'..'z' as c); len = ident (store 0 c) :] ep -> + (("QUESTIONIDENT", get_buff len), (bp, ep)) + | [: s :] -> + let id = get_buff (ident2 (store 0 c) s) in + keyword_or_error (bp, Stream.count s) id ] :] -> + a + | [: `'<'; s :] -> less bp s + | [: `(':' as c1); + len = + parser + [ [: `(']' | ':' | '=' | '>' as c2) :] -> store (store 0 c1) c2 + | [: :] -> store 0 c1 ] :] ep -> + let id = get_buff len in + keyword_or_error (bp, ep) id + | [: `('>' | '|' as c1); + len = + parser + [ [: `(']' | '}' as c2) :] -> store (store 0 c1) c2 + | [: a = ident2 (store 0 c1) :] -> a ] :] ep -> + let id = get_buff len in + keyword_or_error (bp, ep) id + | [: `('[' | '{' as c1); s :] -> + let len = + match Stream.npeek 2 s with + [ ['<'; '<' | ':'] -> store 0 c1 + | _ -> + match s with parser + [ [: `('|' | '<' | ':' as c2) :] -> store (store 0 c1) c2 + | [: :] -> store 0 c1 ] ] + in + let ep = Stream.count s in + let id = get_buff len in + keyword_or_error (bp, ep) id + | [: `'.'; + id = + parser + [ [: `'.' :] -> ".." + | [: :] -> if ssd && after_space then " ." else "." ] :] ep -> + keyword_or_error (bp, ep) id + | [: `';'; + id = + parser + [ [: `';' :] -> ";;" + | [: :] -> ";" ] :] ep -> + keyword_or_error (bp, ep) id + | [: `'\\'; s :] ep -> (("LIDENT", get_buff (ident3 0 s)), (bp, ep)) + | [: `c :] ep -> keyword_or_error (bp, ep) (String.make 1 c) + | [: _ = Stream.empty :] -> (("EOI", ""), (bp, succ bp)) ] + and less bp strm = + if no_quotations.val then + match strm with parser + [ [: len = ident2 (store 0 '<') :] ep -> + let id = get_buff len in + keyword_or_error (bp, ep) id ] + else + match strm with parser + [ [: `'<'; len = quotation bp 0 :] ep -> + (("QUOTATION", ":" ^ get_buff len), (bp, ep)) + | [: `':'; i = parser [: len = ident 0 :] -> get_buff len; + `'<' ? "character '<' expected"; len = quotation bp 0 :] ep -> + (("QUOTATION", i ^ ":" ^ get_buff len), (bp, ep)) + | [: len = ident2 (store 0 '<') :] ep -> + let id = get_buff len in + keyword_or_error (bp, ep) id ] + and string bp len = + parser + [ [: `'"' :] -> len + | [: `'\\'; `c; s :] -> string bp (store (store len '\\') c) s + | [: `c; s :] -> string bp (store len c) s + | [: :] ep -> err (bp, ep) "string not terminated" ] + and char bp len = + parser + [ [: `'''; s :] -> if len = 0 then char bp (store len ''') s else len + | [: `'\\'; `c; s :] -> char bp (store (store len '\\') c) s + | [: `c; s :] -> char bp (store len c) s + | [: :] ep -> err (bp, ep) "char not terminated" ] + and dollar bp len = + parser + [ [: `'$' :] -> ("ANTIQUOT", ":" ^ get_buff len) + | [: `('a'..'z' | 'A'..'Z' as c); s :] -> antiquot bp (store len c) s + | [: `('0'..'9' as c); s :] -> maybe_locate bp (store len c) s + | [: `':'; s :] -> + let k = get_buff len in + ("ANTIQUOT", k ^ ":" ^ locate_or_antiquot_rest bp 0 s) + | [: `'\\'; `c; s :] -> + ("ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s) + | [: s :] -> + if dfa then + match s with parser + [ [: `c :] -> + ("ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s) + | [: :] ep -> err (bp, ep) "antiquotation not terminated" ] + else ("", get_buff (ident2 (store 0 '$') s)) ] + and maybe_locate bp len = + parser + [ [: `'$' :] -> ("ANTIQUOT", ":" ^ get_buff len) + | [: `('0'..'9' as c); s :] -> maybe_locate bp (store len c) s + | [: `':'; s :] -> + ("LOCATE", get_buff len ^ ":" ^ locate_or_antiquot_rest bp 0 s) + | [: `'\\'; `c; s :] -> + ("ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s) + | [: `c; s :] -> + ("ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s) + | [: :] ep -> err (bp, ep) "antiquotation not terminated" ] + and antiquot bp len = + parser + [ [: `'$' :] -> ("ANTIQUOT", ":" ^ get_buff len) + | [: `('a'..'z' | 'A'..'Z' | '0'..'9' as c); s :] -> + antiquot bp (store len c) s + | [: `':'; s :] -> + let k = get_buff len in + ("ANTIQUOT", k ^ ":" ^ locate_or_antiquot_rest bp 0 s) + | [: `'\\'; `c; s :] -> + ("ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s) + | [: `c; s :] -> + ("ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s) + | [: :] ep -> err (bp, ep) "antiquotation not terminated" ] + and locate_or_antiquot_rest bp len = + parser + [ [: `'$' :] -> get_buff len + | [: `'\\'; `c; s :] -> locate_or_antiquot_rest bp (store len c) s + | [: `c; s :] -> locate_or_antiquot_rest bp (store len c) s + | [: :] ep -> err (bp, ep) "antiquotation not terminated" ] + and quotation bp len = + parser + [ [: `'>'; s :] -> maybe_end_quotation bp len s + | [: `'<'; s :] -> + quotation bp (maybe_nested_quotation bp (store len '<') s) s + | [: `'\\'; + len = + parser + [ [: `('>' | '<' | '\\' as c) :] -> store len c + | [: :] -> store len '\\' ]; + s :] -> + quotation bp len s + | [: `c; s :] -> quotation bp (store len c) s + | [: :] ep -> err (bp, ep) "quotation not terminated" ] + and maybe_nested_quotation bp len = + parser + [ [: `'<'; s :] -> mstore (quotation bp (store len '<') s) ">>" + | [: `':'; len = ident (store len ':'); + a = + parser + [ [: `'<'; s :] -> mstore (quotation bp (store len '<') s) ">>" + | [: :] -> len ] :] -> + a + | [: :] -> len ] + and maybe_end_quotation bp len = + parser + [ [: `'>' :] -> len + | [: a = quotation bp (store len '>') :] -> a ] + and left_paren bp = + parser + [ [: `'*'; _ = comment bp; a = next_token True :] -> a + | [: :] ep -> keyword_or_error (bp, ep) "(" ] + and comment bp = + parser + [ [: `'('; s :] -> left_paren_in_comment bp s + | [: `'*'; s :] -> star_in_comment bp s + | [: `'"'; _ = string bp 0; s :] -> comment bp s + | [: `'''; s :] -> quote_in_comment bp s + | [: `c; s :] -> comment bp s + | [: :] ep -> err (bp, ep) "comment not terminated" ] + and quote_in_comment bp = + parser + [ [: `'''; s :] -> comment bp s + | [: `'\013'; s :] -> quote_cr_in_comment bp s + | [: `'\\'; s :] -> quote_antislash_in_comment bp s + | [: `'('; s :] -> quote_left_paren_in_comment bp s + | [: `'*'; s :] -> quote_star_in_comment bp s + | [: `'"'; s :] -> quote_doublequote_in_comment bp s + | [: `_; s :] -> quote_any_in_comment bp s + | [: s :] -> comment bp s ] + and quote_any_in_comment bp = + parser + [ [: `'''; s :] -> comment bp s + | [: s :] -> comment bp s ] + and quote_cr_in_comment bp = + parser + [ [: `'\010'; s :] -> quote_any_in_comment bp s + | [: s :] -> quote_any_in_comment bp s ] + and quote_left_paren_in_comment bp = + parser + [ [: `'''; s :] -> comment bp s + | [: s :] -> left_paren_in_comment bp s ] + and quote_star_in_comment bp = + parser + [ [: `'''; s :] -> comment bp s + | [: s :] -> star_in_comment bp s ] + and quote_doublequote_in_comment bp = + parser + [ [: `'''; s :] -> comment bp s + | [: _ = string bp 0; s :] -> comment bp s ] + and quote_antislash_in_comment bp = + parser + [ [: `'''; s :] -> quote_antislash_quote_in_comment bp s + | [: `('\\' | '"' | 'n' | 't' | 'b' | 'r'); s :] -> + quote_any_in_comment bp s + | [: `('0'..'9'); s :] -> quote_antislash_digit_in_comment bp s + | [: `'x'; s :] -> quote_antislash_x_in_comment bp s + | [: s :] -> comment bp s ] + and quote_antislash_quote_in_comment bp = + parser + [ [: `'''; s :] -> comment bp s + | [: s :] -> quote_in_comment bp s ] + and quote_antislash_digit_in_comment bp = + parser + [ [: `('0'..'9'); s :] -> quote_antislash_digit2_in_comment bp s + | [: s :] -> comment bp s ] + and quote_antislash_digit2_in_comment bp = + parser + [ [: `('0'..'9'); s :] -> quote_any_in_comment bp s + | [: s :] -> comment bp s ] + and quote_antislash_x_in_comment bp = + parser + [ [: _ = hexa; s :] -> quote_antislash_x_digit_in_comment bp s + | [: s :] -> comment bp s ] + and quote_antislash_x_digit_in_comment bp = + parser + [ [: _ = hexa; s :] -> quote_any_in_comment bp s + | [: s :] -> comment bp s ] + and left_paren_in_comment bp = + parser + [ [: `'*'; s :] -> do { comment bp s; comment bp s } + | [: a = comment bp :] -> a ] + and star_in_comment bp = + parser + [ [: `')' :] -> () + | [: a = comment bp :] -> a ] + and linedir n s = + match stream_peek_nth n s with + [ Some (' ' | '\t') -> linedir (n + 1) s + | Some ('0'..'9') -> linedir_digits (n + 1) s + | _ -> False ] + and linedir_digits n s = + match stream_peek_nth n s with + [ Some ('0'..'9') -> linedir_digits (n + 1) s + | _ -> linedir_quote n s ] + and linedir_quote n s = + match stream_peek_nth n s with + [ Some (' ' | '\t') -> linedir_quote (n + 1) s + | Some '"' -> True + | _ -> False ] + and any_to_nl = + parser + [ [: `'\013' | '\010' :] ep -> bolpos.val := ep + | [: `_; s :] -> any_to_nl s + | [: :] -> () ] + in + fun cstrm -> + try + let glex = glexr.val in + let comm_bp = Stream.count cstrm in + let r = next_token False cstrm in + do { + match glex.tok_comm with + [ Some list -> + if fst (snd r) > comm_bp then + let comm_loc = (comm_bp, fst (snd r)) in + glex.tok_comm := Some [comm_loc :: list] + else () + | None -> () ]; + r + } + with + [ Stream.Error str -> + err (Stream.count cstrm, Stream.count cstrm + 1) str ] +; +*) + +let next_token_fun dfa ssd find_kwd bolpos glexr = let keyword_or_error loc s = try ("", find_kwd s), loc with Not_found -> if !error_on_unknown_keywords then err loc ("illegal token: " ^ s) else ("", s), loc in - let rec next_token (strm__ : _ Stream.t) = + let rec next_token after_space (strm__ : _ Stream.t) = let bp = Stream.count strm__ in match Stream.peek strm__ with - Some (' ' | '\010' | '\013' | '\t' | '\026' | '\012') -> - Stream.junk strm__; next_token strm__ + Some ('\010' | '\013') -> + Stream.junk strm__; + let s = strm__ in + let ep = Stream.count strm__ in bolpos := ep; next_token true s + | Some (' ' | '\t' | '\026' | '\012') -> + Stream.junk strm__; next_token true strm__ + | Some '#' when bp = !bolpos -> + Stream.junk strm__; + let s = strm__ in + if linedir 1 s then begin any_to_nl s; next_token true s end + else keyword_or_error (bp, bp + 1) "#" | Some '(' -> Stream.junk strm__; left_paren bp strm__ - | Some '#' -> - Stream.junk strm__; let s = strm__ in spaces_tabs s; linenum bp s | Some ('A'..'Z' | '\192'..'\214' | '\216'..'\222' as c) -> Stream.junk strm__; let s = strm__ in @@ -174,8 +522,8 @@ let next_token_fun dfa find_kwd = | Some '\'' -> Stream.junk strm__; let s = strm__ in - begin match Stream.npeek 3 s with - [_; '\''; _] | ['\\'; _; _] | ['\x0D'; '\x0A'; '\''] -> + begin match Stream.npeek 2 s with + [_; '\''] | ['\\'; _] -> let tok = "CHAR", get_buff (char bp 0 s) in let loc = bp, Stream.count s in tok, loc | _ -> keyword_or_error (bp, Stream.count s) "'" @@ -276,7 +624,7 @@ let next_token_fun dfa find_kwd = try match Stream.peek strm__ with Some '.' -> Stream.junk strm__; ".." - | _ -> "." + | _ -> if ssd && after_space then " ." else "." with Stream.Failure -> raise (Stream.Error "") in @@ -511,7 +859,7 @@ let next_token_fun dfa find_kwd = try comment bp strm__ with Stream.Failure -> raise (Stream.Error "") in - begin try next_token strm__ with + begin try next_token true strm__ with Stream.Failure -> raise (Stream.Error "") end | _ -> let ep = Stream.count strm__ in keyword_or_error (bp, ep) "(" @@ -533,47 +881,26 @@ let next_token_fun dfa find_kwd = and quote_in_comment bp (strm__ : _ Stream.t) = match Stream.peek strm__ with Some '\'' -> Stream.junk strm__; comment bp strm__ - | Some '\013' -> Stream.junk strm__; quote_cr_in_comment bp strm__ - | Some '\\' -> Stream.junk strm__; quote_antislash_in_comment bp strm__ - | Some '(' -> Stream.junk strm__; quote_left_paren_in_comment bp strm__ - | Some '*' -> Stream.junk strm__; quote_star_in_comment bp strm__ - | Some '\"' -> Stream.junk strm__; quote_doublequote_in_comment bp strm__ - | Some _ -> Stream.junk strm__; quote_any_in_comment bp strm__ - | _ -> comment bp strm__ + | Some '\\' -> Stream.junk strm__; quote_antislash_in_comment bp 0 strm__ + | _ -> + let s = strm__ in + begin match Stream.npeek 2 s with + [_; '\''] -> Stream.junk s; Stream.junk s + | _ -> () + end; + comment bp s and quote_any_in_comment bp (strm__ : _ Stream.t) = match Stream.peek strm__ with Some '\'' -> Stream.junk strm__; comment bp strm__ | _ -> comment bp strm__ - and quote_cr_in_comment bp (strm__ : _ Stream.t) = - match Stream.peek strm__ with - Some '\010' -> Stream.junk strm__; quote_any_in_comment bp strm__ - | _ -> quote_any_in_comment bp strm__ - and quote_left_paren_in_comment bp (strm__ : _ Stream.t) = - match Stream.peek strm__ with - Some '\'' -> Stream.junk strm__; comment bp strm__ - | _ -> left_paren_in_comment bp strm__ - and quote_star_in_comment bp (strm__ : _ Stream.t) = + and quote_antislash_in_comment bp len (strm__ : _ Stream.t) = match Stream.peek strm__ with Some '\'' -> Stream.junk strm__; comment bp strm__ - | _ -> star_in_comment bp strm__ - and quote_doublequote_in_comment bp (strm__ : _ Stream.t) = - match Stream.peek strm__ with - Some '\'' -> Stream.junk strm__; comment bp strm__ - | _ -> let _ = string bp 0 strm__ in comment bp strm__ - and quote_antislash_in_comment bp (strm__ : _ Stream.t) = - match Stream.peek strm__ with - Some '\'' -> - Stream.junk strm__; quote_antislash_quote_in_comment bp strm__ | Some ('\\' | '\"' | 'n' | 't' | 'b' | 'r') -> Stream.junk strm__; quote_any_in_comment bp strm__ | Some ('0'..'9') -> Stream.junk strm__; quote_antislash_digit_in_comment bp strm__ - | Some 'x' -> Stream.junk strm__; quote_antislash_x_in_comment bp strm__ | _ -> comment bp strm__ - and quote_antislash_quote_in_comment bp (strm__ : _ Stream.t) = - match Stream.peek strm__ with - Some '\'' -> Stream.junk strm__; comment bp strm__ - | _ -> quote_in_comment bp strm__ and quote_antislash_digit_in_comment bp (strm__ : _ Stream.t) = match Stream.peek strm__ with Some ('0'..'9') -> @@ -583,20 +910,6 @@ let next_token_fun dfa find_kwd = match Stream.peek strm__ with Some ('0'..'9') -> Stream.junk strm__; quote_any_in_comment bp strm__ | _ -> comment bp strm__ - and quote_antislash_x_in_comment bp (strm__ : _ Stream.t) = - match - try Some (hexa strm__) with - Stream.Failure -> None - with - Some _ -> quote_antislash_x_digit_in_comment bp strm__ - | _ -> comment bp strm__ - and quote_antislash_x_digit_in_comment bp (strm__ : _ Stream.t) = - match - try Some (hexa strm__) with - Stream.Failure -> None - with - Some _ -> quote_any_in_comment bp strm__ - | _ -> comment bp strm__ and left_paren_in_comment bp (strm__ : _ Stream.t) = match Stream.peek strm__ with Some '*' -> @@ -606,54 +919,54 @@ let next_token_fun dfa find_kwd = match Stream.peek strm__ with Some ')' -> Stream.junk strm__; () | _ -> comment bp strm__ - and linenum bp (strm__ : _ Stream.t) = - match Stream.peek strm__ with - Some ('0'..'9') -> - Stream.junk strm__; - let _ = - try digits strm__ with - Stream.Failure -> raise (Stream.Error "") - in - let _ = - try spaces_tabs strm__ with - Stream.Failure -> raise (Stream.Error "") - in - begin match Stream.peek strm__ with - Some '\"' -> - Stream.junk strm__; - let _ = - try any_to_nl strm__ with - Stream.Failure -> raise (Stream.Error "") - in - next_token strm__ - | _ -> raise (Stream.Error "") - end - | _ -> keyword_or_error (bp, bp + 1) "#" - and spaces_tabs (strm__ : _ Stream.t) = - match Stream.peek strm__ with - Some (' ' | '\t') -> Stream.junk strm__; spaces_tabs strm__ - | _ -> () - and digits (strm__ : _ Stream.t) = - match Stream.peek strm__ with - Some ('0'..'9') -> Stream.junk strm__; digits strm__ - | _ -> () + and linedir n s = + match stream_peek_nth n s with + Some (' ' | '\t') -> linedir (n + 1) s + | Some ('0'..'9') -> linedir_digits (n + 1) s + | _ -> false + and linedir_digits n s = + match stream_peek_nth n s with + Some ('0'..'9') -> linedir_digits (n + 1) s + | _ -> linedir_quote n s + and linedir_quote n s = + match stream_peek_nth n s with + Some (' ' | '\t') -> linedir_quote (n + 1) s + | Some '\"' -> true + | _ -> false and any_to_nl (strm__ : _ Stream.t) = match Stream.peek strm__ with - Some ('\013' | '\010') -> Stream.junk strm__; () + Some ('\013' | '\010') -> + Stream.junk strm__; let ep = Stream.count strm__ in bolpos := ep | Some _ -> Stream.junk strm__; any_to_nl strm__ | _ -> () in fun cstrm -> - try next_token cstrm with + try + let glex = !glexr in + let comm_bp = Stream.count cstrm in + let r = next_token false cstrm in + begin match glex.tok_comm with + Some list -> + if fst (snd r) > comm_bp then + let comm_loc = comm_bp, fst (snd r) in + glex.tok_comm <- Some (comm_loc :: list) + | None -> () + end; + r + with Stream.Error str -> err (Stream.count cstrm, Stream.count cstrm + 1) str ;; + let dollar_for_antiquotation = ref true;; +let specific_space_dot = ref false;; -let func kwd_table = +let func kwd_table glexr = + let bolpos = ref 0 in let find = Hashtbl.find kwd_table in let dfa = !dollar_for_antiquotation in - Token.lexer_func_of_parser (next_token_fun dfa find) + let ssd = !specific_space_dot in + Token.lexer_func_of_parser (next_token_fun dfa ssd find bolpos glexr) ;; let rec check_keyword_stream (strm__ : _ Stream.t) = @@ -862,9 +1175,22 @@ let tok_match = let gmake () = let kwd_table = Hashtbl.create 301 in let id_table = Hashtbl.create 301 in - {tok_func = func kwd_table; tok_using = using_token kwd_table id_table; - tok_removing = removing_token kwd_table id_table; tok_match = tok_match; - tok_text = text} + let glexr = + ref + {tok_func = (fun _ -> raise (Match_failure ("plexer.ml", 952, 17))); + tok_using = (fun _ -> raise (Match_failure ("plexer.ml", 952, 37))); + tok_removing = (fun _ -> raise (Match_failure ("plexer.ml", 952, 60))); + tok_match = (fun _ -> raise (Match_failure ("plexer.ml", 953, 18))); + tok_text = (fun _ -> raise (Match_failure ("plexer.ml", 953, 37))); + tok_comm = None} + in + let glex = + {tok_func = func kwd_table glexr; + tok_using = using_token kwd_table id_table; + tok_removing = removing_token kwd_table id_table; tok_match = tok_match; + tok_text = text; tok_comm = None} + in + glexr := glex; glex ;; let tparse = @@ -883,6 +1209,15 @@ let tparse = let make () = let kwd_table = Hashtbl.create 301 in let id_table = Hashtbl.create 301 in - {func = func kwd_table; using = using_token kwd_table id_table; + let glexr = + ref + {tok_func = (fun _ -> raise (Match_failure ("plexer.ml", 981, 17))); + tok_using = (fun _ -> raise (Match_failure ("plexer.ml", 981, 37))); + tok_removing = (fun _ -> raise (Match_failure ("plexer.ml", 981, 60))); + tok_match = (fun _ -> raise (Match_failure ("plexer.ml", 982, 18))); + tok_text = (fun _ -> raise (Match_failure ("plexer.ml", 982, 37))); + tok_comm = None} + in + {func = func kwd_table glexr; using = using_token kwd_table id_table; removing = removing_token kwd_table id_table; tparse = tparse; text = text} ;; diff --git a/camlp4/ocaml_src/lib/plexer.mli b/camlp4/ocaml_src/lib/plexer.mli index d682f83ae..19bc0ce1b 100644 --- a/camlp4/ocaml_src/lib/plexer.mli +++ b/camlp4/ocaml_src/lib/plexer.mli @@ -54,6 +54,12 @@ val dollar_for_antiquotation : bool ref;; lexer where the dollar sign is used for antiquotations. If False, the dollar sign can be used as token. *) +val specific_space_dot : bool ref;; + (** When False (default), the next call to [Plexer.make ()] returns a + lexer where the dots can be preceded by spaces. If True, dots + preceded by spaces return the keyword " ." (space dot), otherwise + return the keyword "." (dot). *) + val no_quotations : bool ref;; (** When True, all lexers built by [Plexer.make ()] do not lex the quotation syntax any more. Default is False (quotations are diff --git a/camlp4/ocaml_src/lib/stdpp.ml b/camlp4/ocaml_src/lib/stdpp.ml index 0830e6842..d91ee78c0 100644 --- a/camlp4/ocaml_src/lib/stdpp.ml +++ b/camlp4/ocaml_src/lib/stdpp.ml @@ -23,23 +23,77 @@ let raise_with_loc loc exc = let line_of_loc fname (bp, ep) = try let ic = open_in_bin fname in - let rec loop lin col cnt = - if cnt < bp then - let (lin, col) = - match input_char ic with - '\n' -> lin + 1, 0 - | _ -> lin, col + 1 - in - loop lin col (cnt + 1) - else lin, col, col + ep - bp + let strm = Stream.of_channel ic in + let rec loop fname lin = + let rec not_a_line_dir col (strm__ : _ Stream.t) = + let cnt = Stream.count strm__ in + match Stream.peek strm__ with + Some c -> + Stream.junk strm__; + let s = strm__ in + if cnt < bp then + if c = '\n' then loop fname (lin + 1) + else not_a_line_dir (col + 1) s + else let col = col - (cnt - bp) in fname, lin, col, col + ep - bp + | _ -> raise Stream.Failure + in + let rec a_line_dir str n col (strm__ : _ Stream.t) = + match Stream.peek strm__ with + Some '\n' -> Stream.junk strm__; loop str n + | Some _ -> Stream.junk strm__; a_line_dir str n (col + 1) strm__ + | _ -> raise Stream.Failure + in + let rec spaces col (strm__ : _ Stream.t) = + match Stream.peek strm__ with + Some ' ' -> Stream.junk strm__; spaces (col + 1) strm__ + | _ -> col + in + let rec check_string str n col (strm__ : _ Stream.t) = + match Stream.peek strm__ with + Some '\"' -> + Stream.junk strm__; + let col = + try spaces (col + 1) strm__ with + Stream.Failure -> raise (Stream.Error "") + in + a_line_dir str n col strm__ + | Some c when c <> '\n' -> + Stream.junk strm__; + check_string (str ^ String.make 1 c) n (col + 1) strm__ + | _ -> not_a_line_dir col strm__ + in + let check_quote n col (strm__ : _ Stream.t) = + match Stream.peek strm__ with + Some '\"' -> Stream.junk strm__; check_string "" n (col + 1) strm__ + | _ -> not_a_line_dir col strm__ + in + let rec check_num n col (strm__ : _ Stream.t) = + match Stream.peek strm__ with + Some ('0'..'9' as c) -> + Stream.junk strm__; + check_num (10 * n + Char.code c - Char.code '0') (col + 1) strm__ + | _ -> let col = spaces col strm__ in check_quote n col strm__ + in + let begin_line (strm__ : _ Stream.t) = + match Stream.peek strm__ with + Some '#' -> + Stream.junk strm__; + let col = + try spaces 1 strm__ with + Stream.Failure -> raise (Stream.Error "") + in + check_num 0 col strm__ + | _ -> not_a_line_dir 0 strm__ + in + begin_line strm in let r = - try loop 1 0 0 with - End_of_file -> 1, bp, ep + try loop fname 1 with + Stream.Failure -> fname, 1, bp, ep in close_in ic; r with - Sys_error _ -> 1, bp, ep + Sys_error _ -> fname, 1, bp, ep ;; let loc_name = ref "loc";; diff --git a/camlp4/ocaml_src/lib/stdpp.mli b/camlp4/ocaml_src/lib/stdpp.mli index 5afd1b2db..68c0cb6ad 100644 --- a/camlp4/ocaml_src/lib/stdpp.mli +++ b/camlp4/ocaml_src/lib/stdpp.mli @@ -25,10 +25,12 @@ val raise_with_loc : int * int -> exn -> 'a;; (** [raise_with_loc loc e], if [e] is already the exception [Exc_located], re-raise it, else raise the exception [Exc_located loc e]. *) -val line_of_loc : string -> int * int -> int * int * int;; +val line_of_loc : string -> int * int -> string * int * int * int;; (** [line_of_loc fname loc] reads the file [fname] up to the - location [loc] and returns the line number and the characters - location in the line *) + location [loc] and returns the real input file, the line number + and the characters location in the line; the real input file + can be different from [fname] because of possibility of line + directives typically generated by /lib/cpp. *) val loc_name : string ref;; (** Name of the location variable used in grammars and in the predefined diff --git a/camlp4/ocaml_src/lib/token.ml b/camlp4/ocaml_src/lib/token.ml index 63f09bc19..67aaffdec 100644 --- a/camlp4/ocaml_src/lib/token.ml +++ b/camlp4/ocaml_src/lib/token.ml @@ -26,7 +26,8 @@ type 'te glexer = tok_using : pattern -> unit; tok_removing : pattern -> unit; tok_match : pattern -> 'te -> string; - tok_text : pattern -> string } + tok_text : pattern -> string; + mutable tok_comm : location list option } ;; type lexer = { func : t lexer_func; diff --git a/camlp4/ocaml_src/lib/token.mli b/camlp4/ocaml_src/lib/token.mli index 4e14469d6..e561e28e8 100644 --- a/camlp4/ocaml_src/lib/token.mli +++ b/camlp4/ocaml_src/lib/token.mli @@ -47,7 +47,8 @@ type 'te glexer = tok_using : pattern -> unit; tok_removing : pattern -> unit; tok_match : pattern -> 'te -> string; - tok_text : pattern -> string } + tok_text : pattern -> string; + mutable tok_comm : location list option } ;; (** The type for a lexer used by Camlp4 grammars. - The field [tok_func] is the main lexer function. See [lexer_func] @@ -66,7 +67,9 @@ type 'te glexer = efficency, write it as a function returning functions according to the values of the pattern, not a function with two parameters. - The field [tok_text] returns the name of some token pattern, - used in error messages. *) + used in error messages. +- The field [tok_comm] if not None asks the lexer to record the + locations of the comments. *) val lexer_text : pattern -> string;; (** A simple [tok_text] function for lexers *) |