diff options
-rw-r--r-- | lex/lexer.mli | 5 | ||||
-rw-r--r-- | lex/lexer.mll | 65 | ||||
-rw-r--r-- | lex/main.ml | 19 | ||||
-rw-r--r-- | lex/output.ml | 36 | ||||
-rw-r--r-- | lex/output.mli | 2 | ||||
-rw-r--r-- | lex/parser.mly | 2 | ||||
-rw-r--r-- | lex/syntax.mli | 5 | ||||
-rw-r--r-- | yacc/reader.c | 12 |
8 files changed, 97 insertions, 49 deletions
diff --git a/lex/lexer.mli b/lex/lexer.mli index eda622fa9..da09bc794 100644 --- a/lex/lexer.mli +++ b/lex/lexer.mli @@ -13,4 +13,7 @@ val main: Lexing.lexbuf -> Parser.token -exception Lexical_error of string +exception Lexical_error of string * int * int + +val line_num: int ref +val line_start_pos: int ref diff --git a/lex/lexer.mll b/lex/lexer.mll index 9e8b109c4..cbfbf011d 100644 --- a/lex/lexer.mll +++ b/lex/lexer.mll @@ -22,7 +22,7 @@ open Parser let brace_depth = ref 0 and comment_depth = ref 0 -exception Lexical_error of string +exception Lexical_error of string * int * int let initial_string_buffer = String.create 256 let string_buff = ref initial_string_buffer @@ -56,14 +56,28 @@ let char_for_decimal_code lexbuf i = 10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) + (Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48)) +let line_num = ref 1 +let line_start_pos = ref 0 + +let handle_lexical_error fn lexbuf = + let line = !line_num + and column = Lexing.lexeme_start lexbuf - !line_start_pos in + try + fn lexbuf + with Lexical_error(msg, _, _) -> + raise(Lexical_error(msg, line, column)) } rule main = parse - [' ' '\010' '\013' '\009' '\012' ] + + [' ' '\013' '\009' '\012' ] + { main lexbuf } + | '\010' + { line_start_pos := Lexing.lexeme_end lexbuf; + incr line_num; + main lexbuf } | "(*" { comment_depth := 1; - comment lexbuf; + handle_lexical_error comment lexbuf; main lexbuf } | ['A'-'Z' 'a'-'z'] ['A'-'Z' 'a'-'z' '\'' '_' '0'-'9'] * { match Lexing.lexeme lexbuf with @@ -75,7 +89,7 @@ rule main = parse | s -> Tident s } | '"' { reset_string_buffer(); - string lexbuf; + handle_lexical_error string lexbuf; Tstring(get_stored_string()) } | "'" [^ '\\'] "'" { Tchar(Char.code(Lexing.lexeme_char lexbuf 1)) } @@ -84,10 +98,13 @@ rule main = parse | "'" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'" { Tchar(Char.code(char_for_decimal_code lexbuf 2)) } | '{' - { let n1 = Lexing.lexeme_end lexbuf in - brace_depth := 1; - let n2 = action lexbuf in - Taction(Location(n1, n2)) } + { let n1 = Lexing.lexeme_end lexbuf + and l1 = !line_num + and s1 = !line_start_pos in + brace_depth := 1; + let n2 = handle_lexical_error action lexbuf in + Taction({start_pos = n1; end_pos = n2; + start_line = l1; start_col = n1 - s1}) } | '=' { Tequal } | '|' { Tor } | '_' { Tunderscore } @@ -103,7 +120,8 @@ rule main = parse | eof { Tend } | _ { raise(Lexical_error - ("illegal character " ^ String.escaped(Lexing.lexeme lexbuf))) } + ("illegal character " ^ String.escaped(Lexing.lexeme lexbuf), + !line_num, Lexing.lexeme_start lexbuf - !line_start_pos)) } and action = parse '{' @@ -111,7 +129,7 @@ and action = parse action lexbuf } | '}' { decr brace_depth; - if !brace_depth == 0 then Lexing.lexeme_start lexbuf else action lexbuf } + if !brace_depth = 0 then Lexing.lexeme_start lexbuf else action lexbuf } | '"' { reset_string_buffer(); string lexbuf; @@ -128,15 +146,21 @@ and action = parse comment lexbuf; action lexbuf } | eof - { raise (Lexical_error "unterminated action") } + { raise (Lexical_error("unterminated action", 0, 0)) } + | '\010' + { line_start_pos := Lexing.lexeme_end lexbuf; + incr line_num; + action lexbuf } | _ { action lexbuf } and string = parse '"' { () } - | '\\' [' ' '\010' '\013' '\009' '\026' '\012'] + - { string lexbuf } + | '\\' [' ' '\013' '\009' '\012'] * '\010' [' ' '\013' '\009' '\012'] * + { line_start_pos := Lexing.lexeme_end lexbuf; + incr line_num; + string lexbuf } | '\\' ['\\' '"' 'n' 't' 'b' 'r'] { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1)); string lexbuf } @@ -144,7 +168,12 @@ and string = parse { store_string_char(char_for_decimal_code lexbuf 1); string lexbuf } | eof - { raise(Lexical_error "unterminated string") } + { raise(Lexical_error("unterminated string", 0, 0)) } + | '\010' + { store_string_char '\010'; + line_start_pos := Lexing.lexeme_end lexbuf; + incr line_num; + string lexbuf } | _ { store_string_char(Lexing.lexeme_char lexbuf 0); string lexbuf } @@ -154,7 +183,7 @@ and comment = parse { incr comment_depth; comment lexbuf } | "*)" { decr comment_depth; - if !comment_depth == 0 then () else comment lexbuf } + if !comment_depth = 0 then () else comment lexbuf } | '"' { reset_string_buffer(); string lexbuf; @@ -169,6 +198,10 @@ and comment = parse | "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'" { comment lexbuf } | eof - { raise(Lexical_error "unterminated comment") } + { raise(Lexical_error("unterminated comment", 0, 0)) } + | '\010' + { line_start_pos := Lexing.lexeme_end lexbuf; + incr line_num; + comment lexbuf } | _ { comment lexbuf } diff --git a/lex/main.ml b/lex/main.ml index cd7d02e6c..a56041602 100644 --- a/lex/main.ml +++ b/lex/main.ml @@ -39,21 +39,20 @@ let main () = Sys.remove dest_name; begin match exn with Parsing.Parse_error -> - prerr_string "Syntax error around char "; - prerr_int (Lexing.lexeme_start lexbuf); - prerr_endline "." - | Lexer.Lexical_error s -> - prerr_string "Lexical error around char "; - prerr_int (Lexing.lexeme_start lexbuf); - prerr_string ": "; - prerr_string s; - prerr_endline "." + Printf.fprintf stderr + "File \"%s\", line %d, character %d: syntax error.\n" + source_name !Lexer.line_num + (Lexing.lexeme_start lexbuf - !Lexer.line_start_pos) + | Lexer.Lexical_error(msg, line, col) -> + Printf.fprintf stderr + "File \"%s\", line %d, character %d: %s.\n" + source_name line col msg | _ -> raise exn end; exit 2 in let (entries, transitions) = Lexgen.make_dfa def in let tables = Compact.compact_tables transitions in - Output.output_lexdef ic oc def.header tables entries def.trailer; + Output.output_lexdef source_name ic oc def.header tables entries def.trailer; close_in ic; close_out oc diff --git a/lex/output.ml b/lex/output.ml index c5426905b..9e2504bcd 100644 --- a/lex/output.ml +++ b/lex/output.ml @@ -22,8 +22,7 @@ open Compact let copy_buffer = String.create 1024 -let copy_chunk_unix ic oc (Location(start,stop)) = - seek_in ic start; +let copy_chars_unix ic oc start stop = let n = ref (stop - start) in while !n > 0 do let m = input ic copy_buffer 0 (min !n 1024) in @@ -31,17 +30,24 @@ let copy_chunk_unix ic oc (Location(start,stop)) = n := !n - m done -let copy_chunk_win32 ic oc (Location(start,stop)) = - seek_in ic start; +let copy_chars_win32 ic oc start stop = for i = start to stop - 1 do let c = input_char ic in if c <> '\r' then output_char oc c done -let copy_chunk = +let copy_chars = match Sys.os_type with - "Win32" -> copy_chunk_win32 - | _ -> copy_chunk_unix + "Win32" -> copy_chars_win32 + | _ -> copy_chars_unix + +let copy_chunk sourcefile ic oc loc = + if loc.start_pos < loc.end_pos then begin + fprintf oc "# %d \"%s\"\n" loc.start_line sourcefile; + for i = 1 to loc.start_col do output_char oc ' ' done; + seek_in ic loc.start_pos; + copy_chars ic oc loc.start_pos loc.end_pos + end (* To output an array of short ints, encoded as a string *) @@ -73,7 +79,7 @@ let output_tables oc tbl = (* Output the entries *) -let output_entry ic oc e = +let output_entry sourcefile ic oc e = fprintf oc "%s lexbuf = %s_rec lexbuf %d\n" e.auto_name e.auto_name e.auto_initial_state; fprintf oc "and %s_rec lexbuf state =\n" e.auto_name; @@ -82,8 +88,8 @@ let output_entry ic oc e = List.iter (fun (num, loc) -> if !first then first := false else fprintf oc " | "; - fprintf oc "%d -> (" num; - copy_chunk ic oc loc; + fprintf oc "%d -> (\n" num; + copy_chunk sourcefile ic oc loc; fprintf oc ")\n") e.auto_actions; fprintf oc " | n -> lexbuf.Lexing.refill_buff lexbuf; %s_rec lexbuf n\n\n" @@ -91,7 +97,7 @@ let output_entry ic oc e = (* Main output function *) -let output_lexdef ic oc header tables entry_points trailer = +let output_lexdef sourcefile ic oc header tables entry_points trailer = Printf.printf "%d states, %d transitions, table size %d bytes\n" (Array.length tables.tbl_base) (Array.length tables.tbl_trans) @@ -99,14 +105,14 @@ let output_lexdef ic oc header tables entry_points trailer = Array.length tables.tbl_default + Array.length tables.tbl_trans + Array.length tables.tbl_check)); flush stdout; - copy_chunk ic oc header; + copy_chunk sourcefile ic oc header; output_tables oc tables; begin match entry_points with [] -> () | entry1 :: entries -> - output_string oc "let rec "; output_entry ic oc entry1; + output_string oc "let rec "; output_entry sourcefile ic oc entry1; List.iter - (fun e -> output_string oc "and "; output_entry ic oc e) + (fun e -> output_string oc "and "; output_entry sourcefile ic oc e) entries end; - copy_chunk ic oc trailer + copy_chunk sourcefile ic oc trailer diff --git a/lex/output.mli b/lex/output.mli index a60c97a24..7df1eb50c 100644 --- a/lex/output.mli +++ b/lex/output.mli @@ -14,7 +14,7 @@ (* Output the DFA tables and its entry points *) val output_lexdef: - in_channel -> out_channel -> + string -> in_channel -> out_channel -> Syntax.location -> Compact.lex_tables -> Lexgen.automata_entry list -> diff --git a/lex/parser.mly b/lex/parser.mly index 91d86f1d7..1b629e920 100644 --- a/lex/parser.mly +++ b/lex/parser.mly @@ -69,7 +69,7 @@ header: Taction { $1 } | /*epsilon*/ - { Location(0,0) } + { { start_pos = 0; end_pos = 0; start_line = 1; start_col = 0 } } ; named_regexps: named_regexps Tlet Tident Tequal regexp diff --git a/lex/syntax.mli b/lex/syntax.mli index 6e3025428..0d291bce0 100644 --- a/lex/syntax.mli +++ b/lex/syntax.mli @@ -14,7 +14,10 @@ (* The shallow abstract syntax *) type location = - Location of int * int + { start_pos: int; + end_pos: int; + start_line: int; + start_col: int } type regular_expression = Epsilon diff --git a/yacc/reader.c b/yacc/reader.c index 09bfba6d8..2b77c53c3 100644 --- a/yacc/reader.c +++ b/yacc/reader.c @@ -1203,8 +1203,10 @@ void copy_action(void) insert_empty_rule(); last_was_action = 1; - fprintf(f, "(* Rule %d, file %s, line %d *)\n", + /* + fprintf(f, "(* Rule %d, file %s, line %d *)\n", nrules-2, input_file_name, lineno); + */ if (sflag) fprintf(f, "yyact.(%d) <- (fun parser_env ->\n", nrules-2); else @@ -1216,7 +1218,7 @@ void copy_action(void) for (i = 1; i <= n; i++) { item = pitem[nitems + i - n - 1]; if (item->class == TERM && !item->tag) continue; - fprintf(f, "\tlet dollar__%d = ", i); + fprintf(f, "\tlet _%d = ", i); if (item->tag) fprintf(f, "(peek_val parser_env %d : %s) in\n", n - i, item->tag); else if (sflag) @@ -1224,7 +1226,9 @@ void copy_action(void) else fprintf(f, "(peek_val parser_env %d : '%s) in\n", n - i, item->name); } - fprintf(f, "\tObj.repr(("); + fprintf(f, "\tObj.repr((\n"); + fprintf(f, "# %d \"%s\"\n", lineno, input_file_name); + for (i = cptr - line; i >= 0; i--) fputc(' ', f); depth = 1; cptr++; @@ -1243,7 +1247,7 @@ loop: item = pitem[nitems + i - n - 1]; if (item->class == TERM && !item->tag) illegal_token_ref(i, item->name); - fprintf(f, "dollar__%d", i); + fprintf(f, "_%d", i); goto loop; } } |