diff options
author | Damien Doligez <damien.doligez-inria.fr> | 1997-04-15 19:18:03 +0000 |
---|---|---|
committer | Damien Doligez <damien.doligez-inria.fr> | 1997-04-15 19:18:03 +0000 |
commit | bc679705e6573c660bbd13f8b96e515cf31069f4 (patch) | |
tree | 7b3bcab1d2408ee771641fbf898c6a26995912d2 | |
parent | 554036180b208b1047760442f294a97fc26b9b31 (diff) |
Fix pour que les lexeurs d'ocamllex sachent traiter le '\000'.
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@1501 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
-rw-r--r-- | byterun/lexing.c | 18 | ||||
-rw-r--r-- | lex/compact.ml | 8 | ||||
-rw-r--r-- | lex/lexer.mll | 6 | ||||
-rw-r--r-- | lex/lexgen.ml | 10 | ||||
-rw-r--r-- | lex/parser.mly | 14 | ||||
-rw-r--r-- | lex/syntax.mli | 3 |
6 files changed, 33 insertions, 26 deletions
diff --git a/byterun/lexing.c b/byterun/lexing.c index df7ff9b61..3efd46804 100644 --- a/byterun/lexing.c +++ b/byterun/lexing.c @@ -27,6 +27,7 @@ struct lexer_buffer { value lex_curr_pos; value lex_last_pos; value lex_last_action; + value lex_eof_reached; }; struct lexing_table { @@ -72,11 +73,18 @@ value lex_engine(tbl, start_state, lexbuf) /* ML */ lexbuf->lex_last_action = Val_int(backtrk); } /* See if we need a refill */ - if (lexbuf->lex_curr_pos >= lexbuf->lex_buffer_len) - return Val_int(-state - 1); - /* Read next input char */ - c = Byte_u(lexbuf->lex_buffer, Long_val(lexbuf->lex_curr_pos)); - lexbuf->lex_curr_pos += 2; + if (lexbuf->lex_curr_pos >= lexbuf->lex_buffer_len){ + if (lexbuf->lex_eof_reached == Val_int (0)){ + return Val_int(-state - 1); + }else{ + c = 256; + lexbuf->lex_eof_reached = Val_int (0); + } + }else{ + /* Read next input char */ + c = Byte_u(lexbuf->lex_buffer, Long_val(lexbuf->lex_curr_pos)); + lexbuf->lex_curr_pos += 2; + } /* Determine next state */ if (Short(tbl->lex_check, base + c) == state) state = Short(tbl->lex_trans, base + c); diff --git a/lex/compact.ml b/lex/compact.ml index aafa76738..506191b78 100644 --- a/lex/compact.ml +++ b/lex/compact.ml @@ -59,8 +59,8 @@ let grow_transitions () = Array.blit old_check 0 !check 0 !last_used let pack_moves state_num move_t = - let move_v = Array.create 256 0 in - for i = 0 to 255 do + let move_v = Array.create 257 0 in + for i = 0 to 256 do move_v.(i) <- (match move_t.(i) with Backtrack -> -1 @@ -69,7 +69,7 @@ let pack_moves state_num move_t = let default = most_frequent_elt move_v in let nondef = non_default_elements default move_v in let rec pack_from b = - while b + 256 > Array.length !trans do grow_transitions() done; + while b + 257 > Array.length !trans do grow_transitions() done; let rec try_pack = function [] -> b | (pos, v) :: rem -> @@ -81,7 +81,7 @@ let pack_moves state_num move_t = !trans.(base + pos) <- v; !check.(base + pos) <- state_num) nondef; - if base + 256 > !last_used then last_used := base + 256; + if base + 257 > !last_used then last_used := base + 257; (base, default) (* Build the tables *) diff --git a/lex/lexer.mll b/lex/lexer.mll index 4b0e90ab1..c3f054f13 100644 --- a/lex/lexer.mll +++ b/lex/lexer.mll @@ -77,11 +77,11 @@ rule main = parse string lexbuf; Tstring(get_stored_string()) } | "'" [^ '\\'] "'" - { Tchar(Lexing.lexeme_char lexbuf 1) } + { Tchar(Char.code(Lexing.lexeme_char lexbuf 1)) } | "'" '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "'" - { Tchar(char_for_backslash (Lexing.lexeme_char lexbuf 2)) } + { Tchar(Char.code(char_for_backslash (Lexing.lexeme_char lexbuf 2))) } | "'" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'" - { Tchar(char_for_decimal_code lexbuf 2) } + { Tchar(Char.code(char_for_decimal_code lexbuf 2)) } | '{' { let n1 = Lexing.lexeme_end lexbuf in brace_depth := 1; diff --git a/lex/lexgen.ml b/lex/lexgen.ml index 80ada9789..5843c18ef 100644 --- a/lex/lexgen.ml +++ b/lex/lexgen.ml @@ -51,7 +51,7 @@ type automata_entry = (* From shallow to deep syntax *) -let chars = ref ([] : char list list) +let chars = ref ([] : int list list) let chars_count = ref 0 let actions = ref ([] : (int * location) list) let actions_count = ref 0 @@ -203,16 +203,16 @@ let goto_state st = if TransSet.is_empty st then Backtrack else Goto (get_state st) let transition_from chars follow pos_set = - let tr = Array.create 256 TransSet.empty in - let shift = Array.create 256 Backtrack in + let tr = Array.create 257 TransSet.empty in + let shift = Array.create 257 Backtrack in List.iter (fun pos -> List.iter (fun c -> - tr.(Char.code c) <- TransSet.union tr.(Char.code c) follow.(pos)) + tr.(c) <- TransSet.union tr.(c) follow.(pos)) chars.(pos)) pos_set; - for i = 0 to 255 do + for i = 0 to 256 do shift.(i) <- goto_state tr.(i) done; shift diff --git a/lex/parser.mly b/lex/parser.mly index 819f4f090..044f2b561 100644 --- a/lex/parser.mly +++ b/lex/parser.mly @@ -21,16 +21,16 @@ open Syntax let regexp_for_string s = let rec re_string n = if n >= String.length s then Epsilon - else if succ n = String.length s then Characters([s.[n]]) - else Sequence(Characters([s.[n]]), re_string (succ n)) + else if succ n = String.length s then Characters([Char.code (s.[n])]) + else Sequence(Characters([Char.code (s.[n])]), re_string (succ n)) in re_string 0 let char_class c1 c2 = let rec cl n = - if n > (Char.code c2) then [] else (Char.chr n) :: cl(succ n) - in cl (Char.code c1) + if n > c2 then [] else n :: cl(succ n) + in cl c1 -let all_chars = char_class (Char.chr 1) (Char.chr 255) +let all_chars = char_class 0 255 let rec subtract l1 l2 = match l1 with @@ -39,7 +39,7 @@ let rec subtract l1 l2 = %} %token <string> Tident -%token <char> Tchar +%token <int> Tchar %token <string> Tstring %token <Syntax.location> Taction %token Trule Tparse Tand Tequal Tend Tor Tunderscore Teof Tlbracket Trbracket @@ -98,7 +98,7 @@ regexp: Tunderscore { Characters all_chars } | Teof - { Characters ['\000'] } + { Characters [256] } | Tchar { Characters [$1] } | Tstring diff --git a/lex/syntax.mli b/lex/syntax.mli index 48f971709..6e3025428 100644 --- a/lex/syntax.mli +++ b/lex/syntax.mli @@ -18,7 +18,7 @@ type location = type regular_expression = Epsilon - | Characters of char list + | Characters of int list | Sequence of regular_expression * regular_expression | Alternative of regular_expression * regular_expression | Repetition of regular_expression @@ -27,4 +27,3 @@ type lexer_definition = { header: location; entrypoints: (string * (regular_expression * location) list) list; trailer: location } - |