diff options
Diffstat (limited to 'ocamldoc/odoc_ocamlhtml.mll')
-rw-r--r-- | ocamldoc/odoc_ocamlhtml.mll | 538 |
1 files changed, 538 insertions, 0 deletions
diff --git a/ocamldoc/odoc_ocamlhtml.mll b/ocamldoc/odoc_ocamlhtml.mll new file mode 100644 index 000000000..cff408ade --- /dev/null +++ b/ocamldoc/odoc_ocamlhtml.mll @@ -0,0 +1,538 @@ + +{ +(***********************************************************************) +(* OCamldoc *) +(* *) +(* Maxence Guesdon, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 2001 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(** Generation of html code to display OCaml code. *) +open Lexing + +exception Fatal_error + +let fatal_error msg = + prerr_string ">> Fatal error: "; prerr_endline msg; raise Fatal_error + +type error = + | Illegal_character of char + | Unterminated_comment + | Unterminated_string + | Unterminated_string_in_comment + | Keyword_as_label of string +;; + +exception Error of error * int * int + +let base_escape_strings = [ + ("&", "&") ; + ("<", "<") ; + (">", ">") ; +] + +let pre_escape_strings = [ + (" ", " ") ; + ("\n", "<br>\n") ; + ("\t", " ") ; + ] + + +let pre = ref false +let fmt = ref Format.str_formatter + +(** Escape the strings which would clash with html syntax, + and some other strings if we want to get a PRE style.*) +let escape s = + List.fold_left + (fun acc -> fun (s, s2) -> Str.global_replace (Str.regexp s) s2 acc) + s + (if !pre then base_escape_strings @ pre_escape_strings else base_escape_strings) + +(** Escape the strings which would clash with html syntax. *) +let escape_base s = + List.fold_left + (fun acc -> fun (s, s2) -> Str.global_replace (Str.regexp s) s2 acc) + s + base_escape_strings + +(** The output functions *) + +let print ?(esc=true) s = + Format.pp_print_string !fmt (if esc then escape s else s) +;; + +let print_class ?(esc=true) cl s = + print ~esc: false ("<span class=\""^cl^"\">"^ + (if esc then escape s else s)^ + "</span>") +;; + +(** The table of keywords with colors *) +let create_hashtable size init = + let tbl = Hashtbl.create size in + List.iter (fun (key, data) -> Hashtbl.add tbl key data) init; + tbl + +(** The function used to return html code for the given comment body. *) +let html_of_comment = ref + (fun (s : string) -> "<b>Odoc_ocamlhtml.html_of_comment not initialized</b>") + +let keyword_table = + create_hashtable 149 [ + "and", "keyword" ; + "as", "keyword" ; + "assert", "keyword" ; + "begin", "keyword" ; + "class", "keyword" ; + "constraint", "keyword" ; + "do", "keyword" ; + "done", "keyword" ; + "downto", "keyword" ; + "else", "keyword" ; + "end", "keyword" ; + "exception", "keyword" ; + "external", "keyword" ; + "false", "keyword" ; + "for", "keyword" ; + "fun", "keyword" ; + "function", "keyword" ; + "functor", "keyword" ; + "if", "keyword" ; + "in", "keyword" ; + "include", "keyword" ; + "inherit", "keyword" ; + "initializer", "keyword" ; + "lazy", "keyword" ; + "let", "keyword" ; + "match", "keyword" ; + "method", "keyword" ; + "module", "keyword" ; + "mutable", "keyword" ; + "new", "keyword" ; + "object", "keyword" ; + "of", "keyword" ; + "open", "keyword" ; + "or", "keyword" ; + "parser", "keyword" ; + "private", "keyword" ; + "rec", "keyword" ; + "sig", "keyword" ; + "struct", "keyword" ; + "then", "keyword" ; + "to", "keyword" ; + "true", "keyword" ; + "try", "keyword" ; + "type", "keyword" ; + "val", "keyword" ; + "virtual", "keyword" ; + "when", "keyword" ; + "while", "keyword" ; + "with", "keyword" ; + + "mod", "keyword" ; + "land", "keyword" ; + "lor", "keyword" ; + "lxor", "keyword" ; + "lsl", "keyword" ; + "lsr", "keyword" ; + "asr", "keyword" ; +] + +let kwsign_class = "keywordsign" +let constructor_class = "constructor" +let comment_class = "comment" +let string_class = "string" +let code_class = "code" + + +(** To buffer and print comments *) + + +let margin = ref 0 + +let comment_buffer = Buffer.create 32 +let reset_comment_buffer () = Buffer.reset comment_buffer +let store_comment_char = Buffer.add_char comment_buffer + +let make_margin () = + let rec iter n = + if n <= 0 then "" + else " "^(iter (n-1)) + in + iter !margin + +let print_comment () = + let s = Buffer.contents comment_buffer in + let len = String.length s in + let code = + if len < 1 then + "<span class=\""^comment_class^"\">(*"^(escape s)^"*)</span>" + else + match s.[0] with + '*' -> + ( + try + let html = !html_of_comment (String.sub s 1 (len-1)) in + "</code><table><tr><td>"^(make_margin ())^"</td><td>"^ + "<span class=\""^comment_class^"\">"^ + "(**"^html^"*)"^ + "</span></td></tr></table><code>" + with + e -> + prerr_endline (Printexc.to_string e); + "<span class=\""^comment_class^"\">(*"^(escape s)^"*)</span>" + ) + | _ -> + "<span class=\""^comment_class^"\">(*"^(escape s)^"*)</span>" + in + print ~esc: false code + +(** To buffer string literals *) + +let string_buffer = Buffer.create 32 +let reset_string_buffer () = Buffer.reset string_buffer +let store_string_char = Buffer.add_char string_buffer +let get_stored_string () = + let s = Buffer.contents string_buffer in + String.escaped s + +(** To translate escape sequences *) + +let char_for_backslash = + match Sys.os_type with + | "Unix" | "Win32" | "Cygwin" -> + begin function + | 'n' -> '\010' + | 'r' -> '\013' + | 'b' -> '\008' + | 't' -> '\009' + | c -> c + end + | "MacOS" -> + begin function + | 'n' -> '\013' + | 'r' -> '\010' + | 'b' -> '\008' + | 't' -> '\009' + | c -> c + end + | x -> fatal_error "Lexer: unknown system type" + +let char_for_decimal_code lexbuf i = + let c = 100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) + + 10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) + + (Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48) in + Char.chr(c land 0xFF) + +(** To store the position of the beginning of a string and comment *) +let string_start_pos = ref 0;; +let comment_start_pos = ref [];; +let in_comment () = !comment_start_pos <> [];; + +(** Error report *) + +open Format + +let report_error ppf = function + | Illegal_character c -> + fprintf ppf "Illegal character (%s)" (Char.escaped c) + | Unterminated_comment -> + fprintf ppf "Comment not terminated" + | Unterminated_string -> + fprintf ppf "String literal not terminated" + | Unterminated_string_in_comment -> + fprintf ppf "This comment contains an unterminated string literal" + | Keyword_as_label kwd -> + fprintf ppf "`%s' is a keyword, it cannot be used as label name" kwd +;; + +} + +let blank = [' ' '\010' '\013' '\009' '\012'] +let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_'] +let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222'] +let identchar = + ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9'] +let symbolchar = + ['!' '$' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~'] +let decimal_literal = ['0'-'9']+ +let hex_literal = '0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']+ +let oct_literal = '0' ['o' 'O'] ['0'-'7']+ +let bin_literal = '0' ['b' 'B'] ['0'-'1']+ +let float_literal = + ['0'-'9']+ ('.' ['0'-'9']* )? (['e' 'E'] ['+' '-']? ['0'-'9']+)? + +rule token = parse + blank + { + let s = Lexing.lexeme lexbuf in + ( + match s with + " " -> incr margin + | "\t" -> margin := !margin + 8 + | "\n" -> margin := 0 + | _ -> () + ); + print s; + token lexbuf + } + | "_" + { print "_" ; token lexbuf } + | "~" { print "~" ; token lexbuf } + | "~" lowercase identchar * ':' + { let s = Lexing.lexeme lexbuf in + let name = String.sub s 1 (String.length s - 2) in + if Hashtbl.mem keyword_table name then + raise (Error(Keyword_as_label name, Lexing.lexeme_start lexbuf, + Lexing.lexeme_end lexbuf)); + print s ; token lexbuf } + | "?" { print "?" ; token lexbuf } + | "?" lowercase identchar * ':' + { let s = Lexing.lexeme lexbuf in + let name = String.sub s 1 (String.length s - 2) in + if Hashtbl.mem keyword_table name then + raise (Error(Keyword_as_label name, Lexing.lexeme_start lexbuf, + Lexing.lexeme_end lexbuf)); + print s ; token lexbuf } + | lowercase identchar * + { let s = Lexing.lexeme lexbuf in + try + let cl = Hashtbl.find keyword_table s in + (print_class cl s ; token lexbuf ) + with Not_found -> + (print s ; token lexbuf )} + | uppercase identchar * + { print_class constructor_class (Lexing.lexeme lexbuf) ; token lexbuf } (* No capitalized keywords *) + | decimal_literal | hex_literal | oct_literal | bin_literal + { print (Lexing.lexeme lexbuf) ; token lexbuf } + | float_literal + { print (Lexing.lexeme lexbuf) ; token lexbuf } + | "\"" + { reset_string_buffer(); + let string_start = Lexing.lexeme_start lexbuf in + string_start_pos := string_start; + string lexbuf; + lexbuf.Lexing.lex_start_pos <- + string_start - lexbuf.Lexing.lex_abs_pos; + print_class string_class ("\""^(get_stored_string())^"\"") ; + token lexbuf } + | "'" [^ '\\' '\''] "'" + { print_class string_class (Lexing.lexeme lexbuf) ; + token lexbuf } + | "'" '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "'" + { print_class string_class (Lexing.lexeme lexbuf ) ; + token lexbuf } + | "'" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'" + { print_class string_class (Lexing.lexeme lexbuf ) ; + token lexbuf } + | "(*" + { + reset_comment_buffer (); + comment_start_pos := [Lexing.lexeme_start lexbuf]; + comment lexbuf ; + print_comment (); + token lexbuf } + | "(*)" + { reset_comment_buffer (); + comment_start_pos := [Lexing.lexeme_start lexbuf]; + comment lexbuf ; + print_comment (); + token lexbuf + } + | "*)" + { lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1; + print (Lexing.lexeme lexbuf) ; + token lexbuf + } + | "#" [' ' '\t']* ['0'-'9']+ [^ '\n' '\r'] * ('\n' | '\r' | "\r\n") + (* # linenum ... *) + { + print (Lexing.lexeme lexbuf); + token lexbuf + } + | "#" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf } + | "&" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf } + | "&&" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf } + | "`" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf } + | "'" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf } + | "(" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | ")" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | "*" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | "," { print (Lexing.lexeme lexbuf) ; token lexbuf } + | "??" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf } + | "->" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf } + | "." { print (Lexing.lexeme lexbuf) ; token lexbuf } + | ".." { print (Lexing.lexeme lexbuf) ; token lexbuf } + | ":" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | "::" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | ":=" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | ":>" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | ";" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | ";;" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | "<" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | "<-" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | "=" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | "[" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | "[|" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | "[<" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | "]" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | "{" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | "{<" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | "|" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf } + | "||" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf } + | "|]" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | ">" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | ">]" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | "}" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | ">}" { print (Lexing.lexeme lexbuf) ; token lexbuf } + + | "!=" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | "+" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | "-" { print (Lexing.lexeme lexbuf) ; token lexbuf } + | "-." { print (Lexing.lexeme lexbuf) ; token lexbuf } + + | "!" symbolchar * + { print (Lexing.lexeme lexbuf) ; token lexbuf } + | ['~' '?'] symbolchar + + { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf } + | ['=' '<' '>' '|' '&' '$'] symbolchar * + { print (Lexing.lexeme lexbuf) ; token lexbuf } + | ['@' '^'] symbolchar * + { print (Lexing.lexeme lexbuf) ; token lexbuf } + | ['+' '-'] symbolchar * + { print (Lexing.lexeme lexbuf) ; token lexbuf } + | "**" symbolchar * + { print (Lexing.lexeme lexbuf) ; token lexbuf } + | ['*' '/' '%'] symbolchar * + { print (Lexing.lexeme lexbuf) ; token lexbuf } + | eof { () } + | _ + { raise (Error(Illegal_character ((Lexing.lexeme lexbuf).[0]), + Lexing.lexeme_start lexbuf, Lexing.lexeme_end lexbuf)) } + +and comment = parse + "(*" + { comment_start_pos := Lexing.lexeme_start lexbuf :: !comment_start_pos; + store_comment_char '('; + store_comment_char '*'; + comment lexbuf; + } + | "*)" + { match !comment_start_pos with + | [] -> assert false + | [x] -> comment_start_pos := [] + | _ :: l -> + store_comment_char '*'; + store_comment_char ')'; + comment_start_pos := l; + comment lexbuf; + } + | "\"" + { reset_string_buffer(); + string_start_pos := Lexing.lexeme_start lexbuf; + store_comment_char '"'; + begin try string lexbuf + with Error (Unterminated_string, _, _) -> + let st = List.hd !comment_start_pos in + raise (Error (Unterminated_string_in_comment, st, st + 2)) + end; + comment lexbuf } + | "''" + { + store_comment_char '\''; + store_comment_char '\''; + comment lexbuf } + | "'" [^ '\\' '\''] "'" + { + store_comment_char '\''; + store_comment_char (Lexing.lexeme_char lexbuf 1); + store_comment_char '\''; + comment lexbuf } + | "'\\" ['\\' '\'' 'n' 't' 'b' 'r'] "'" + { + store_comment_char '\''; + store_comment_char '\\'; + store_comment_char(char_for_backslash(Lexing.lexeme_char lexbuf 1)) ; + store_comment_char '\''; + comment lexbuf } + | "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'" + { + store_comment_char '\''; + store_comment_char '\\'; + store_comment_char(char_for_decimal_code lexbuf 1); + store_comment_char '\''; + comment lexbuf } + | eof + { let st = List.hd !comment_start_pos in + raise (Error (Unterminated_comment, st, st + 2)); + } + | _ + { store_comment_char(Lexing.lexeme_char lexbuf 0); + comment lexbuf } + +and string = parse + '"' + { () } + | '\\' ("\010" | "\013" | "\013\010") [' ' '\009'] * + { string lexbuf } + | '\\' ['\\' '"' 'n' 't' 'b' 'r'] + { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1)); + string lexbuf } + | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] + { store_string_char(char_for_decimal_code lexbuf 1); + string lexbuf } + | eof + { raise (Error (Unterminated_string, + !string_start_pos, !string_start_pos+1)) } + | _ + { store_string_char(Lexing.lexeme_char lexbuf 0); + string lexbuf } +{ + +let html_of_code ?(with_pre=true) code = + let old_pre = !pre in + let old_margin = !margin in + let old_comment_buffer = Buffer.contents comment_buffer in + let old_string_buffer = Buffer.contents string_buffer in + let buf = Buffer.create 256 in + let old_fmt = !fmt in + fmt := Format.formatter_of_buffer buf ; + pre := with_pre; + margin := 0; + + + let start = "<span class=\""^code_class^"\"><code>" in + let ending = "</code></span>" in + let html = + ( + try + print ~esc: false start ; + let lexbuf = Lexing.from_string code in + let _ = token lexbuf in + print ~esc: false ending ; + Format.pp_print_flush !fmt () ; + Buffer.contents buf + with + _ -> + (* flush str_formatter because we already output + something in it *) + Format.pp_print_flush !fmt () ; + start^code^ending + ) + in + pre := old_pre; + margin := old_margin ; + Buffer.reset comment_buffer; + Buffer.add_string comment_buffer old_comment_buffer ; + Buffer.reset string_buffer; + Buffer.add_string string_buffer old_string_buffer ; + fmt := old_fmt ; + + html + +} |