diff options
author | Luc Maranget <luc.maranget@inria.fr> | 2007-01-29 16:44:16 +0000 |
---|---|---|
committer | Luc Maranget <luc.maranget@inria.fr> | 2007-01-29 16:44:16 +0000 |
commit | 5ca2e4ee42ddee9b8c8f27df4c3216210210bbe5 (patch) | |
tree | 83f1f26c8ea4edb1680b53d5f56cd2ad73972626 | |
parent | 0eaf3a256c26dd8796b9892ad61a421be94cd4f8 (diff) |
ocamllex: as bound variables with position in .mll file
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@7815 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
-rw-r--r-- | lex/common.ml | 26 | ||||
-rw-r--r-- | lex/common.mli | 4 | ||||
-rw-r--r-- | lex/lexgen.ml | 101 | ||||
-rw-r--r-- | lex/lexgen.mli | 4 | ||||
-rw-r--r-- | lex/output.ml | 2 | ||||
-rw-r--r-- | lex/outputbis.ml | 2 | ||||
-rw-r--r-- | lex/parser.mly | 9 | ||||
-rw-r--r-- | lex/syntax.ml | 2 | ||||
-rw-r--r-- | lex/syntax.mli | 2 |
9 files changed, 89 insertions, 63 deletions
diff --git a/lex/common.ml b/lex/common.ml index f56e7a869..cacea62d0 100644 --- a/lex/common.ml +++ b/lex/common.ml @@ -122,26 +122,36 @@ let output_tag_access oc = function | Sum (a,i) -> fprintf oc "(%a + %d)" output_base_mem a i -let output_env oc env = +let output_env sourcefile ic oc tr env = let pref = ref "let" in match env with | [] -> () - | _ -> + | _ -> + (* Probably, we are better with variables sorted + in apparition order *) + let env = + List.sort + (fun ((_,p1),_) ((_,p2),_) -> + Pervasives.compare p1.start_pos p2.start_pos) + env in + List.iter - (fun (x,v) -> + (fun ((x,pos),v) -> + fprintf oc "%s\n" !pref ; + copy_chunk sourcefile ic oc tr pos false ; begin match v with | Ident_string (o,nstart,nend) -> fprintf oc - "\n %s %s = Lexing.sub_lexeme%s lexbuf %a %a" - !pref x (if o then "_opt" else "") + "= Lexing.sub_lexeme%s lexbuf %a %a" + (if o then "_opt" else "") output_tag_access nstart output_tag_access nend | Ident_char (o,nstart) -> fprintf oc - "\n %s %s = Lexing.sub_lexeme_char%s lexbuf %a" - !pref x (if o then "_opt" else "") + "= Lexing.sub_lexeme_char%s lexbuf %a" + (if o then "_opt" else "") output_tag_access nstart end ; - pref := "and") + pref := "\nand") env ; fprintf oc " in\n" diff --git a/lex/common.mli b/lex/common.mli index 4210d21d8..e5742b45b 100644 --- a/lex/common.mli +++ b/lex/common.mli @@ -19,7 +19,9 @@ val copy_chunk : val output_mem_access : out_channel -> int -> unit val output_memory_actions : string -> out_channel -> Lexgen.memory_action list -> unit -val output_env : out_channel -> (string * Lexgen.ident_info) list -> unit +val output_env : + string -> in_channel -> out_channel -> line_tracker -> + (Lexgen.ident * Lexgen.ident_info) list -> unit val output_args : out_channel -> string list -> unit val quiet_mode : bool ref;; diff --git a/lex/lexgen.ml b/lex/lexgen.ml index 8d665e77e..ce4d649d2 100644 --- a/lex/lexgen.ml +++ b/lex/lexgen.ml @@ -23,7 +23,9 @@ exception Memory_overflow (* Deep abstract syntax for regular expressions *) -type tag_info = {id : string ; start : bool ; action : int} +type ident = string * Syntax.location + +type tag_info = {id : ident ; start : bool ; action : int} type regexp = Empty @@ -39,7 +41,7 @@ type tag_addr = Sum of (tag_base * int) type ident_info = | Ident_string of bool * tag_addr * tag_addr | Ident_char of bool * tag_addr -type t_env = (string * ident_info) list +type t_env = (ident * ident_info) list type ('args,'action) lexer_entry = { lex_name: string; @@ -85,10 +87,13 @@ module Tags = Set.Make(struct type t = tag_info let compare = compare end) module TagMap = Map.Make (struct type t = tag_info let compare = compare end) -module StringSet = - Set.Make (struct type t = string let compare = Pervasives.compare end) -module StringMap = - Map.Make (struct type t = string let compare = Pervasives.compare end) +let id_compare (id1,_) (id2,_) = String.compare id1 id2 + +module IdSet = + Set.Make (struct type t = ident let compare = id_compare end) + +module IdMap = + Map.Make (struct type t = ident let compare = id_compare end) (*********************) (* Variable cleaning *) @@ -98,10 +103,10 @@ module StringMap = let rec do_remove_nested to_remove = function | Bind (e,x) -> - if StringSet.mem x to_remove then + if IdSet.mem x to_remove then do_remove_nested to_remove e else - Bind (do_remove_nested (StringSet.add x to_remove) e, x) + Bind (do_remove_nested (IdSet.add x to_remove) e, x) | Epsilon|Eof|Characters _ as e -> e | Sequence (e1, e2) -> Sequence @@ -112,7 +117,7 @@ let rec do_remove_nested to_remove = function | Repetition e -> Repetition (do_remove_nested to_remove e) -let remove_nested_as e = do_remove_nested StringSet.empty e +let remove_nested_as e = do_remove_nested IdSet.empty e (*********************) (* Variable analysis *) @@ -128,36 +133,36 @@ let remove_nested_as e = do_remove_nested StringSet.empty e *) let stringset_delta s1 s2 = - StringSet.union - (StringSet.diff s1 s2) - (StringSet.diff s2 s1) + IdSet.union + (IdSet.diff s1 s2) + (IdSet.diff s2 s1) let rec find_all_vars = function | Characters _|Epsilon|Eof -> - StringSet.empty + IdSet.empty | Bind (e,x) -> - StringSet.add x (find_all_vars e) + IdSet.add x (find_all_vars e) | Sequence (e1,e2)|Alternative (e1,e2) -> - StringSet.union (find_all_vars e1) (find_all_vars e2) + IdSet.union (find_all_vars e1) (find_all_vars e2) | Repetition e -> find_all_vars e let rec do_find_opt = function - | Characters _|Epsilon|Eof -> StringSet.empty, StringSet.empty + | Characters _|Epsilon|Eof -> IdSet.empty, IdSet.empty | Bind (e,x) -> let opt,all = do_find_opt e in - opt, StringSet.add x all + opt, IdSet.add x all | Sequence (e1,e2) -> let opt1,all1 = do_find_opt e1 and opt2,all2 = do_find_opt e2 in - StringSet.union opt1 opt2, StringSet.union all1 all2 + IdSet.union opt1 opt2, IdSet.union all1 all2 | Alternative (e1,e2) -> let opt1,all1 = do_find_opt e1 and opt2,all2 = do_find_opt e2 in - StringSet.union - (StringSet.union opt1 opt2) + IdSet.union + (IdSet.union opt1 opt2) (stringset_delta all1 all2), - StringSet.union all1 all2 + IdSet.union all1 all2 | Repetition e -> let r = find_all_vars e in r,r @@ -175,26 +180,26 @@ let find_optional e = *) let rec do_find_double = function - | Characters _|Epsilon|Eof -> StringSet.empty, StringSet.empty + | Characters _|Epsilon|Eof -> IdSet.empty, IdSet.empty | Bind (e,x) -> let dbl,all = do_find_double e in - (if StringSet.mem x all then - StringSet.add x dbl + (if IdSet.mem x all then + IdSet.add x dbl else dbl), - StringSet.add x all + IdSet.add x all | Sequence (e1,e2) -> let dbl1, all1 = do_find_double e1 and dbl2, all2 = do_find_double e2 in - StringSet.union - (StringSet.inter all1 all2) - (StringSet.union dbl1 dbl2), - StringSet.union all1 all2 + IdSet.union + (IdSet.inter all1 all2) + (IdSet.union dbl1 dbl2), + IdSet.union all1 all2 | Alternative (e1,e2) -> let dbl1, all1 = do_find_double e1 and dbl2, all2 = do_find_double e2 in - StringSet.union dbl1 dbl2, - StringSet.union all1 all2 + IdSet.union dbl1 dbl2, + IdSet.union all1 all2 | Repetition e -> let r = find_all_vars e in r,r @@ -218,27 +223,27 @@ let add_some_some x y = match x,y with | _,_ -> None let rec do_find_chars sz = function - | Epsilon|Eof -> StringSet.empty, StringSet.empty, sz - | Characters _ -> StringSet.empty, StringSet.empty, add_some 1 sz + | Epsilon|Eof -> IdSet.empty, IdSet.empty, sz + | Characters _ -> IdSet.empty, IdSet.empty, add_some 1 sz | Bind (e,x) -> let c,s,e_sz = do_find_chars (Some 0) e in begin match e_sz with | Some 1 -> - StringSet.add x c,s,add_some 1 sz + IdSet.add x c,s,add_some 1 sz | _ -> - c, StringSet.add x s, add_some_some sz e_sz + c, IdSet.add x s, add_some_some sz e_sz end | Sequence (e1,e2) -> let c1,s1,sz1 = do_find_chars sz e1 in let c2,s2,sz2 = do_find_chars sz1 e2 in - StringSet.union c1 c2, - StringSet.union s1 s2, + IdSet.union c1 c2, + IdSet.union s1 s2, sz2 | Alternative (e1,e2) -> let c1,s1,sz1 = do_find_chars sz e1 and c2,s2,sz2 = do_find_chars sz e2 in - StringSet.union c1 c2, - StringSet.union s1 s2, + IdSet.union c1 c2, + IdSet.union s1 s2, (if sz1 = sz2 then sz1 else None) | Repetition e -> do_find_chars None e @@ -246,7 +251,7 @@ let rec do_find_chars sz = function let find_chars e = let c,s,_ = do_find_chars (Some 0) e in - StringSet.diff c s + IdSet.diff c s (*******************************) (* From shallow to deep syntax *) @@ -281,7 +286,7 @@ let rec encode_regexp char_vars act = function Star r | Bind (r,x) -> let r = encode_regexp char_vars act r in - if StringSet.mem x char_vars then + if IdSet.mem x char_vars then Seq (Tag {id=x ; start=true ; action=act},r) else Seq (Tag {id=x ; start=true ; action=act}, @@ -340,7 +345,7 @@ let opt_regexp all_vars char_vars optional_vars double_vars r = let rec simple_forward pos r = match r with | Tag n -> - if StringSet.mem n.id double_vars then + if IdSet.mem n.id double_vars then r,Some pos else begin Hashtbl.add env (n.id,n.start) (Sum (Start, pos)) ; @@ -383,7 +388,7 @@ let opt_regexp all_vars char_vars optional_vars double_vars r = let rec simple_backward pos r = match r with | Tag n -> - if StringSet.mem n.id double_vars then + if IdSet.mem n.id double_vars then r,Some pos else begin Hashtbl.add env (n.id,n.start) (Sum (End, pos)) ; @@ -428,7 +433,7 @@ let opt_regexp all_vars char_vars optional_vars double_vars r = let rec alloc_exp pos r = match r with | Tag n -> - if StringSet.mem n.id double_vars then + if IdSet.mem n.id double_vars then r,pos else begin match pos with | Some a -> @@ -456,15 +461,15 @@ let opt_regexp all_vars char_vars optional_vars double_vars r = let r,_ = alloc_exp None r in let m = - StringSet.fold + IdSet.fold (fun x r -> let v = - if StringSet.mem x char_vars then + if IdSet.mem x char_vars then Ident_char - (StringSet.mem x optional_vars, get_tag_addr (x,true)) + (IdSet.mem x optional_vars, get_tag_addr (x,true)) else Ident_string - (StringSet.mem x optional_vars, + (IdSet.mem x optional_vars, get_tag_addr (x,true), get_tag_addr (x,false)) in (x,v)::r) diff --git a/lex/lexgen.mli b/lex/lexgen.mli index cd7f6474a..5c9c1bc75 100644 --- a/lex/lexgen.mli +++ b/lex/lexgen.mli @@ -35,6 +35,7 @@ and memory_action = and tag_action = SetTag of int * int | EraseTag of int +type ident = string * Syntax.location (* Representation of entry points *) type tag_base = Start | End | Mem of int @@ -42,7 +43,8 @@ type tag_addr = Sum of (tag_base * int) type ident_info = | Ident_string of bool * tag_addr * tag_addr | Ident_char of bool * tag_addr -type t_env = (string * ident_info) list + +type t_env = (ident * ident_info) list type ('args,'action) automata_entry = { auto_name: string; diff --git a/lex/output.ml b/lex/output.ml index c911c95c4..4f15d0c1e 100644 --- a/lex/output.ml +++ b/lex/output.ml @@ -95,7 +95,7 @@ let output_entry sourcefile ic oc oci e = (fun (num, env, loc) -> fprintf oc " | "; fprintf oc "%d ->\n" num; - output_env oc env; + output_env sourcefile ic oc oci env; copy_chunk sourcefile ic oc oci loc true; fprintf oc "\n") e.auto_actions; diff --git a/lex/outputbis.ml b/lex/outputbis.ml index be1c6af5d..336896a4a 100644 --- a/lex/outputbis.ml +++ b/lex/outputbis.ml @@ -168,7 +168,7 @@ let output_entry sourcefile ic oc tr e = (fun (num, env, loc) -> fprintf oc " | "; fprintf oc "%d ->\n" num; - output_env oc env ; + output_env sourcefile ic oc tr env ; copy_chunk sourcefile ic oc tr loc true; fprintf oc "\n") e.auto_actions; diff --git a/lex/parser.mly b/lex/parser.mly index 19541d19d..dd818e784 100644 --- a/lex/parser.mly +++ b/lex/parser.mly @@ -160,7 +160,14 @@ regexp: $1; exit 2 } | regexp Tas ident - {Bind ($1, $3)} + {let p1 = Parsing.rhs_start_pos 3 + and p2 = Parsing.rhs_end_pos 3 in + let p = { + start_pos = p1.Lexing.pos_cnum ; + end_pos = p2.Lexing.pos_cnum ; + start_line = p1.Lexing.pos_lnum ; + start_col = p1.Lexing.pos_cnum - p1.Lexing.pos_bol ; } in + Bind ($1, ($3, p))} ; ident: diff --git a/lex/syntax.ml b/lex/syntax.ml index e5b3f219b..d1daa02db 100644 --- a/lex/syntax.ml +++ b/lex/syntax.ml @@ -30,7 +30,7 @@ type regular_expression = | Sequence of regular_expression * regular_expression | Alternative of regular_expression * regular_expression | Repetition of regular_expression - | Bind of regular_expression * string + | Bind of regular_expression * (string * location) type ('arg,'action) entry = {name:string ; diff --git a/lex/syntax.mli b/lex/syntax.mli index 368a5d0ba..4864b50ee 100644 --- a/lex/syntax.mli +++ b/lex/syntax.mli @@ -27,7 +27,7 @@ type regular_expression = | Sequence of regular_expression * regular_expression | Alternative of regular_expression * regular_expression | Repetition of regular_expression - | Bind of regular_expression * string + | Bind of regular_expression * (string * location) type ('arg,'action) entry = {name:string ; |