summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDamien Doligez <damien.doligez-inria.fr>1997-04-15 19:18:03 +0000
committerDamien Doligez <damien.doligez-inria.fr>1997-04-15 19:18:03 +0000
commitbc679705e6573c660bbd13f8b96e515cf31069f4 (patch)
tree7b3bcab1d2408ee771641fbf898c6a26995912d2
parent554036180b208b1047760442f294a97fc26b9b31 (diff)
Fix pour que les lexeurs d'ocamllex sachent traiter le '\000'.
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@1501 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
-rw-r--r--byterun/lexing.c18
-rw-r--r--lex/compact.ml8
-rw-r--r--lex/lexer.mll6
-rw-r--r--lex/lexgen.ml10
-rw-r--r--lex/parser.mly14
-rw-r--r--lex/syntax.mli3
6 files changed, 33 insertions, 26 deletions
diff --git a/byterun/lexing.c b/byterun/lexing.c
index df7ff9b61..3efd46804 100644
--- a/byterun/lexing.c
+++ b/byterun/lexing.c
@@ -27,6 +27,7 @@ struct lexer_buffer {
value lex_curr_pos;
value lex_last_pos;
value lex_last_action;
+ value lex_eof_reached;
};
struct lexing_table {
@@ -72,11 +73,18 @@ value lex_engine(tbl, start_state, lexbuf) /* ML */
lexbuf->lex_last_action = Val_int(backtrk);
}
/* See if we need a refill */
- if (lexbuf->lex_curr_pos >= lexbuf->lex_buffer_len)
- return Val_int(-state - 1);
- /* Read next input char */
- c = Byte_u(lexbuf->lex_buffer, Long_val(lexbuf->lex_curr_pos));
- lexbuf->lex_curr_pos += 2;
+ if (lexbuf->lex_curr_pos >= lexbuf->lex_buffer_len){
+ if (lexbuf->lex_eof_reached == Val_int (0)){
+ return Val_int(-state - 1);
+ }else{
+ c = 256;
+ lexbuf->lex_eof_reached = Val_int (0);
+ }
+ }else{
+ /* Read next input char */
+ c = Byte_u(lexbuf->lex_buffer, Long_val(lexbuf->lex_curr_pos));
+ lexbuf->lex_curr_pos += 2;
+ }
/* Determine next state */
if (Short(tbl->lex_check, base + c) == state)
state = Short(tbl->lex_trans, base + c);
diff --git a/lex/compact.ml b/lex/compact.ml
index aafa76738..506191b78 100644
--- a/lex/compact.ml
+++ b/lex/compact.ml
@@ -59,8 +59,8 @@ let grow_transitions () =
Array.blit old_check 0 !check 0 !last_used
let pack_moves state_num move_t =
- let move_v = Array.create 256 0 in
- for i = 0 to 255 do
+ let move_v = Array.create 257 0 in
+ for i = 0 to 256 do
move_v.(i) <-
(match move_t.(i) with
Backtrack -> -1
@@ -69,7 +69,7 @@ let pack_moves state_num move_t =
let default = most_frequent_elt move_v in
let nondef = non_default_elements default move_v in
let rec pack_from b =
- while b + 256 > Array.length !trans do grow_transitions() done;
+ while b + 257 > Array.length !trans do grow_transitions() done;
let rec try_pack = function
[] -> b
| (pos, v) :: rem ->
@@ -81,7 +81,7 @@ let pack_moves state_num move_t =
!trans.(base + pos) <- v;
!check.(base + pos) <- state_num)
nondef;
- if base + 256 > !last_used then last_used := base + 256;
+ if base + 257 > !last_used then last_used := base + 257;
(base, default)
(* Build the tables *)
diff --git a/lex/lexer.mll b/lex/lexer.mll
index 4b0e90ab1..c3f054f13 100644
--- a/lex/lexer.mll
+++ b/lex/lexer.mll
@@ -77,11 +77,11 @@ rule main = parse
string lexbuf;
Tstring(get_stored_string()) }
| "'" [^ '\\'] "'"
- { Tchar(Lexing.lexeme_char lexbuf 1) }
+ { Tchar(Char.code(Lexing.lexeme_char lexbuf 1)) }
| "'" '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "'"
- { Tchar(char_for_backslash (Lexing.lexeme_char lexbuf 2)) }
+ { Tchar(Char.code(char_for_backslash (Lexing.lexeme_char lexbuf 2))) }
| "'" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
- { Tchar(char_for_decimal_code lexbuf 2) }
+ { Tchar(Char.code(char_for_decimal_code lexbuf 2)) }
| '{'
{ let n1 = Lexing.lexeme_end lexbuf in
brace_depth := 1;
diff --git a/lex/lexgen.ml b/lex/lexgen.ml
index 80ada9789..5843c18ef 100644
--- a/lex/lexgen.ml
+++ b/lex/lexgen.ml
@@ -51,7 +51,7 @@ type automata_entry =
(* From shallow to deep syntax *)
-let chars = ref ([] : char list list)
+let chars = ref ([] : int list list)
let chars_count = ref 0
let actions = ref ([] : (int * location) list)
let actions_count = ref 0
@@ -203,16 +203,16 @@ let goto_state st =
if TransSet.is_empty st then Backtrack else Goto (get_state st)
let transition_from chars follow pos_set =
- let tr = Array.create 256 TransSet.empty in
- let shift = Array.create 256 Backtrack in
+ let tr = Array.create 257 TransSet.empty in
+ let shift = Array.create 257 Backtrack in
List.iter
(fun pos ->
List.iter
(fun c ->
- tr.(Char.code c) <- TransSet.union tr.(Char.code c) follow.(pos))
+ tr.(c) <- TransSet.union tr.(c) follow.(pos))
chars.(pos))
pos_set;
- for i = 0 to 255 do
+ for i = 0 to 256 do
shift.(i) <- goto_state tr.(i)
done;
shift
diff --git a/lex/parser.mly b/lex/parser.mly
index 819f4f090..044f2b561 100644
--- a/lex/parser.mly
+++ b/lex/parser.mly
@@ -21,16 +21,16 @@ open Syntax
let regexp_for_string s =
let rec re_string n =
if n >= String.length s then Epsilon
- else if succ n = String.length s then Characters([s.[n]])
- else Sequence(Characters([s.[n]]), re_string (succ n))
+ else if succ n = String.length s then Characters([Char.code (s.[n])])
+ else Sequence(Characters([Char.code (s.[n])]), re_string (succ n))
in re_string 0
let char_class c1 c2 =
let rec cl n =
- if n > (Char.code c2) then [] else (Char.chr n) :: cl(succ n)
- in cl (Char.code c1)
+ if n > c2 then [] else n :: cl(succ n)
+ in cl c1
-let all_chars = char_class (Char.chr 1) (Char.chr 255)
+let all_chars = char_class 0 255
let rec subtract l1 l2 =
match l1 with
@@ -39,7 +39,7 @@ let rec subtract l1 l2 =
%}
%token <string> Tident
-%token <char> Tchar
+%token <int> Tchar
%token <string> Tstring
%token <Syntax.location> Taction
%token Trule Tparse Tand Tequal Tend Tor Tunderscore Teof Tlbracket Trbracket
@@ -98,7 +98,7 @@ regexp:
Tunderscore
{ Characters all_chars }
| Teof
- { Characters ['\000'] }
+ { Characters [256] }
| Tchar
{ Characters [$1] }
| Tstring
diff --git a/lex/syntax.mli b/lex/syntax.mli
index 48f971709..6e3025428 100644
--- a/lex/syntax.mli
+++ b/lex/syntax.mli
@@ -18,7 +18,7 @@ type location =
type regular_expression =
Epsilon
- | Characters of char list
+ | Characters of int list
| Sequence of regular_expression * regular_expression
| Alternative of regular_expression * regular_expression
| Repetition of regular_expression
@@ -27,4 +27,3 @@ type lexer_definition =
{ header: location;
entrypoints: (string * (regular_expression * location) list) list;
trailer: location }
-