summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lex/lexer.mli5
-rw-r--r--lex/lexer.mll65
-rw-r--r--lex/main.ml19
-rw-r--r--lex/output.ml36
-rw-r--r--lex/output.mli2
-rw-r--r--lex/parser.mly2
-rw-r--r--lex/syntax.mli5
-rw-r--r--yacc/reader.c12
8 files changed, 97 insertions, 49 deletions
diff --git a/lex/lexer.mli b/lex/lexer.mli
index eda622fa9..da09bc794 100644
--- a/lex/lexer.mli
+++ b/lex/lexer.mli
@@ -13,4 +13,7 @@
val main: Lexing.lexbuf -> Parser.token
-exception Lexical_error of string
+exception Lexical_error of string * int * int
+
+val line_num: int ref
+val line_start_pos: int ref
diff --git a/lex/lexer.mll b/lex/lexer.mll
index 9e8b109c4..cbfbf011d 100644
--- a/lex/lexer.mll
+++ b/lex/lexer.mll
@@ -22,7 +22,7 @@ open Parser
let brace_depth = ref 0
and comment_depth = ref 0
-exception Lexical_error of string
+exception Lexical_error of string * int * int
let initial_string_buffer = String.create 256
let string_buff = ref initial_string_buffer
@@ -56,14 +56,28 @@ let char_for_decimal_code lexbuf i =
10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) +
(Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48))
+let line_num = ref 1
+let line_start_pos = ref 0
+
+let handle_lexical_error fn lexbuf =
+ let line = !line_num
+ and column = Lexing.lexeme_start lexbuf - !line_start_pos in
+ try
+ fn lexbuf
+ with Lexical_error(msg, _, _) ->
+ raise(Lexical_error(msg, line, column))
}
rule main = parse
- [' ' '\010' '\013' '\009' '\012' ] +
+ [' ' '\013' '\009' '\012' ] +
{ main lexbuf }
+ | '\010'
+ { line_start_pos := Lexing.lexeme_end lexbuf;
+ incr line_num;
+ main lexbuf }
| "(*"
{ comment_depth := 1;
- comment lexbuf;
+ handle_lexical_error comment lexbuf;
main lexbuf }
| ['A'-'Z' 'a'-'z'] ['A'-'Z' 'a'-'z' '\'' '_' '0'-'9'] *
{ match Lexing.lexeme lexbuf with
@@ -75,7 +89,7 @@ rule main = parse
| s -> Tident s }
| '"'
{ reset_string_buffer();
- string lexbuf;
+ handle_lexical_error string lexbuf;
Tstring(get_stored_string()) }
| "'" [^ '\\'] "'"
{ Tchar(Char.code(Lexing.lexeme_char lexbuf 1)) }
@@ -84,10 +98,13 @@ rule main = parse
| "'" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
{ Tchar(Char.code(char_for_decimal_code lexbuf 2)) }
| '{'
- { let n1 = Lexing.lexeme_end lexbuf in
- brace_depth := 1;
- let n2 = action lexbuf in
- Taction(Location(n1, n2)) }
+ { let n1 = Lexing.lexeme_end lexbuf
+ and l1 = !line_num
+ and s1 = !line_start_pos in
+ brace_depth := 1;
+ let n2 = handle_lexical_error action lexbuf in
+ Taction({start_pos = n1; end_pos = n2;
+ start_line = l1; start_col = n1 - s1}) }
| '=' { Tequal }
| '|' { Tor }
| '_' { Tunderscore }
@@ -103,7 +120,8 @@ rule main = parse
| eof { Tend }
| _
{ raise(Lexical_error
- ("illegal character " ^ String.escaped(Lexing.lexeme lexbuf))) }
+ ("illegal character " ^ String.escaped(Lexing.lexeme lexbuf),
+ !line_num, Lexing.lexeme_start lexbuf - !line_start_pos)) }
and action = parse
'{'
@@ -111,7 +129,7 @@ and action = parse
action lexbuf }
| '}'
{ decr brace_depth;
- if !brace_depth == 0 then Lexing.lexeme_start lexbuf else action lexbuf }
+ if !brace_depth = 0 then Lexing.lexeme_start lexbuf else action lexbuf }
| '"'
{ reset_string_buffer();
string lexbuf;
@@ -128,15 +146,21 @@ and action = parse
comment lexbuf;
action lexbuf }
| eof
- { raise (Lexical_error "unterminated action") }
+ { raise (Lexical_error("unterminated action", 0, 0)) }
+ | '\010'
+ { line_start_pos := Lexing.lexeme_end lexbuf;
+ incr line_num;
+ action lexbuf }
| _
{ action lexbuf }
and string = parse
'"'
{ () }
- | '\\' [' ' '\010' '\013' '\009' '\026' '\012'] +
- { string lexbuf }
+ | '\\' [' ' '\013' '\009' '\012'] * '\010' [' ' '\013' '\009' '\012'] *
+ { line_start_pos := Lexing.lexeme_end lexbuf;
+ incr line_num;
+ string lexbuf }
| '\\' ['\\' '"' 'n' 't' 'b' 'r']
{ store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1));
string lexbuf }
@@ -144,7 +168,12 @@ and string = parse
{ store_string_char(char_for_decimal_code lexbuf 1);
string lexbuf }
| eof
- { raise(Lexical_error "unterminated string") }
+ { raise(Lexical_error("unterminated string", 0, 0)) }
+ | '\010'
+ { store_string_char '\010';
+ line_start_pos := Lexing.lexeme_end lexbuf;
+ incr line_num;
+ string lexbuf }
| _
{ store_string_char(Lexing.lexeme_char lexbuf 0);
string lexbuf }
@@ -154,7 +183,7 @@ and comment = parse
{ incr comment_depth; comment lexbuf }
| "*)"
{ decr comment_depth;
- if !comment_depth == 0 then () else comment lexbuf }
+ if !comment_depth = 0 then () else comment lexbuf }
| '"'
{ reset_string_buffer();
string lexbuf;
@@ -169,6 +198,10 @@ and comment = parse
| "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
{ comment lexbuf }
| eof
- { raise(Lexical_error "unterminated comment") }
+ { raise(Lexical_error("unterminated comment", 0, 0)) }
+ | '\010'
+ { line_start_pos := Lexing.lexeme_end lexbuf;
+ incr line_num;
+ comment lexbuf }
| _
{ comment lexbuf }
diff --git a/lex/main.ml b/lex/main.ml
index cd7d02e6c..a56041602 100644
--- a/lex/main.ml
+++ b/lex/main.ml
@@ -39,21 +39,20 @@ let main () =
Sys.remove dest_name;
begin match exn with
Parsing.Parse_error ->
- prerr_string "Syntax error around char ";
- prerr_int (Lexing.lexeme_start lexbuf);
- prerr_endline "."
- | Lexer.Lexical_error s ->
- prerr_string "Lexical error around char ";
- prerr_int (Lexing.lexeme_start lexbuf);
- prerr_string ": ";
- prerr_string s;
- prerr_endline "."
+ Printf.fprintf stderr
+ "File \"%s\", line %d, character %d: syntax error.\n"
+ source_name !Lexer.line_num
+ (Lexing.lexeme_start lexbuf - !Lexer.line_start_pos)
+ | Lexer.Lexical_error(msg, line, col) ->
+ Printf.fprintf stderr
+ "File \"%s\", line %d, character %d: %s.\n"
+ source_name line col msg
| _ -> raise exn
end;
exit 2 in
let (entries, transitions) = Lexgen.make_dfa def in
let tables = Compact.compact_tables transitions in
- Output.output_lexdef ic oc def.header tables entries def.trailer;
+ Output.output_lexdef source_name ic oc def.header tables entries def.trailer;
close_in ic;
close_out oc
diff --git a/lex/output.ml b/lex/output.ml
index c5426905b..9e2504bcd 100644
--- a/lex/output.ml
+++ b/lex/output.ml
@@ -22,8 +22,7 @@ open Compact
let copy_buffer = String.create 1024
-let copy_chunk_unix ic oc (Location(start,stop)) =
- seek_in ic start;
+let copy_chars_unix ic oc start stop =
let n = ref (stop - start) in
while !n > 0 do
let m = input ic copy_buffer 0 (min !n 1024) in
@@ -31,17 +30,24 @@ let copy_chunk_unix ic oc (Location(start,stop)) =
n := !n - m
done
-let copy_chunk_win32 ic oc (Location(start,stop)) =
- seek_in ic start;
+let copy_chars_win32 ic oc start stop =
for i = start to stop - 1 do
let c = input_char ic in
if c <> '\r' then output_char oc c
done
-let copy_chunk =
+let copy_chars =
match Sys.os_type with
- "Win32" -> copy_chunk_win32
- | _ -> copy_chunk_unix
+ "Win32" -> copy_chars_win32
+ | _ -> copy_chars_unix
+
+let copy_chunk sourcefile ic oc loc =
+ if loc.start_pos < loc.end_pos then begin
+ fprintf oc "# %d \"%s\"\n" loc.start_line sourcefile;
+ for i = 1 to loc.start_col do output_char oc ' ' done;
+ seek_in ic loc.start_pos;
+ copy_chars ic oc loc.start_pos loc.end_pos
+ end
(* To output an array of short ints, encoded as a string *)
@@ -73,7 +79,7 @@ let output_tables oc tbl =
(* Output the entries *)
-let output_entry ic oc e =
+let output_entry sourcefile ic oc e =
fprintf oc "%s lexbuf = %s_rec lexbuf %d\n"
e.auto_name e.auto_name e.auto_initial_state;
fprintf oc "and %s_rec lexbuf state =\n" e.auto_name;
@@ -82,8 +88,8 @@ let output_entry ic oc e =
List.iter
(fun (num, loc) ->
if !first then first := false else fprintf oc " | ";
- fprintf oc "%d -> (" num;
- copy_chunk ic oc loc;
+ fprintf oc "%d -> (\n" num;
+ copy_chunk sourcefile ic oc loc;
fprintf oc ")\n")
e.auto_actions;
fprintf oc " | n -> lexbuf.Lexing.refill_buff lexbuf; %s_rec lexbuf n\n\n"
@@ -91,7 +97,7 @@ let output_entry ic oc e =
(* Main output function *)
-let output_lexdef ic oc header tables entry_points trailer =
+let output_lexdef sourcefile ic oc header tables entry_points trailer =
Printf.printf "%d states, %d transitions, table size %d bytes\n"
(Array.length tables.tbl_base)
(Array.length tables.tbl_trans)
@@ -99,14 +105,14 @@ let output_lexdef ic oc header tables entry_points trailer =
Array.length tables.tbl_default + Array.length tables.tbl_trans +
Array.length tables.tbl_check));
flush stdout;
- copy_chunk ic oc header;
+ copy_chunk sourcefile ic oc header;
output_tables oc tables;
begin match entry_points with
[] -> ()
| entry1 :: entries ->
- output_string oc "let rec "; output_entry ic oc entry1;
+ output_string oc "let rec "; output_entry sourcefile ic oc entry1;
List.iter
- (fun e -> output_string oc "and "; output_entry ic oc e)
+ (fun e -> output_string oc "and "; output_entry sourcefile ic oc e)
entries
end;
- copy_chunk ic oc trailer
+ copy_chunk sourcefile ic oc trailer
diff --git a/lex/output.mli b/lex/output.mli
index a60c97a24..7df1eb50c 100644
--- a/lex/output.mli
+++ b/lex/output.mli
@@ -14,7 +14,7 @@
(* Output the DFA tables and its entry points *)
val output_lexdef:
- in_channel -> out_channel ->
+ string -> in_channel -> out_channel ->
Syntax.location ->
Compact.lex_tables ->
Lexgen.automata_entry list ->
diff --git a/lex/parser.mly b/lex/parser.mly
index 91d86f1d7..1b629e920 100644
--- a/lex/parser.mly
+++ b/lex/parser.mly
@@ -69,7 +69,7 @@ header:
Taction
{ $1 }
| /*epsilon*/
- { Location(0,0) }
+ { { start_pos = 0; end_pos = 0; start_line = 1; start_col = 0 } }
;
named_regexps:
named_regexps Tlet Tident Tequal regexp
diff --git a/lex/syntax.mli b/lex/syntax.mli
index 6e3025428..0d291bce0 100644
--- a/lex/syntax.mli
+++ b/lex/syntax.mli
@@ -14,7 +14,10 @@
(* The shallow abstract syntax *)
type location =
- Location of int * int
+ { start_pos: int;
+ end_pos: int;
+ start_line: int;
+ start_col: int }
type regular_expression =
Epsilon
diff --git a/yacc/reader.c b/yacc/reader.c
index 09bfba6d8..2b77c53c3 100644
--- a/yacc/reader.c
+++ b/yacc/reader.c
@@ -1203,8 +1203,10 @@ void copy_action(void)
insert_empty_rule();
last_was_action = 1;
- fprintf(f, "(* Rule %d, file %s, line %d *)\n",
+ /*
+ fprintf(f, "(* Rule %d, file %s, line %d *)\n",
nrules-2, input_file_name, lineno);
+ */
if (sflag)
fprintf(f, "yyact.(%d) <- (fun parser_env ->\n", nrules-2);
else
@@ -1216,7 +1218,7 @@ void copy_action(void)
for (i = 1; i <= n; i++) {
item = pitem[nitems + i - n - 1];
if (item->class == TERM && !item->tag) continue;
- fprintf(f, "\tlet dollar__%d = ", i);
+ fprintf(f, "\tlet _%d = ", i);
if (item->tag)
fprintf(f, "(peek_val parser_env %d : %s) in\n", n - i, item->tag);
else if (sflag)
@@ -1224,7 +1226,9 @@ void copy_action(void)
else
fprintf(f, "(peek_val parser_env %d : '%s) in\n", n - i, item->name);
}
- fprintf(f, "\tObj.repr((");
+ fprintf(f, "\tObj.repr((\n");
+ fprintf(f, "# %d \"%s\"\n", lineno, input_file_name);
+ for (i = cptr - line; i >= 0; i--) fputc(' ', f);
depth = 1;
cptr++;
@@ -1243,7 +1247,7 @@ loop:
item = pitem[nitems + i - n - 1];
if (item->class == TERM && !item->tag)
illegal_token_ref(i, item->name);
- fprintf(f, "dollar__%d", i);
+ fprintf(f, "_%d", i);
goto loop;
}
}