summaryrefslogtreecommitdiffstats
path: root/ocamldoc/odoc_ocamlhtml.mll
diff options
context:
space:
mode:
Diffstat (limited to 'ocamldoc/odoc_ocamlhtml.mll')
-rw-r--r--ocamldoc/odoc_ocamlhtml.mll538
1 files changed, 538 insertions, 0 deletions
diff --git a/ocamldoc/odoc_ocamlhtml.mll b/ocamldoc/odoc_ocamlhtml.mll
new file mode 100644
index 000000000..cff408ade
--- /dev/null
+++ b/ocamldoc/odoc_ocamlhtml.mll
@@ -0,0 +1,538 @@
+
+{
+(***********************************************************************)
+(* OCamldoc *)
+(* *)
+(* Maxence Guesdon, projet Cristal, INRIA Rocquencourt *)
+(* *)
+(* Copyright 2001 Institut National de Recherche en Informatique et *)
+(* en Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the Q Public License version 1.0. *)
+(* *)
+(***********************************************************************)
+
+(** Generation of html code to display OCaml code. *)
+open Lexing
+
+exception Fatal_error
+
+let fatal_error msg =
+ prerr_string ">> Fatal error: "; prerr_endline msg; raise Fatal_error
+
+type error =
+ | Illegal_character of char
+ | Unterminated_comment
+ | Unterminated_string
+ | Unterminated_string_in_comment
+ | Keyword_as_label of string
+;;
+
+exception Error of error * int * int
+
+let base_escape_strings = [
+ ("&", "&") ;
+ ("<", "&lt;") ;
+ (">", "&gt;") ;
+]
+
+let pre_escape_strings = [
+ (" ", "&nbsp;") ;
+ ("\n", "<br>\n") ;
+ ("\t", "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;") ;
+ ]
+
+
+let pre = ref false
+let fmt = ref Format.str_formatter
+
+(** Escape the strings which would clash with html syntax,
+ and some other strings if we want to get a PRE style.*)
+let escape s =
+ List.fold_left
+ (fun acc -> fun (s, s2) -> Str.global_replace (Str.regexp s) s2 acc)
+ s
+ (if !pre then base_escape_strings @ pre_escape_strings else base_escape_strings)
+
+(** Escape the strings which would clash with html syntax. *)
+let escape_base s =
+ List.fold_left
+ (fun acc -> fun (s, s2) -> Str.global_replace (Str.regexp s) s2 acc)
+ s
+ base_escape_strings
+
+(** The output functions *)
+
+let print ?(esc=true) s =
+ Format.pp_print_string !fmt (if esc then escape s else s)
+;;
+
+let print_class ?(esc=true) cl s =
+ print ~esc: false ("<span class=\""^cl^"\">"^
+ (if esc then escape s else s)^
+ "</span>")
+;;
+
+(** The table of keywords with colors *)
+let create_hashtable size init =
+ let tbl = Hashtbl.create size in
+ List.iter (fun (key, data) -> Hashtbl.add tbl key data) init;
+ tbl
+
+(** The function used to return html code for the given comment body. *)
+let html_of_comment = ref
+ (fun (s : string) -> "<b>Odoc_ocamlhtml.html_of_comment not initialized</b>")
+
+let keyword_table =
+ create_hashtable 149 [
+ "and", "keyword" ;
+ "as", "keyword" ;
+ "assert", "keyword" ;
+ "begin", "keyword" ;
+ "class", "keyword" ;
+ "constraint", "keyword" ;
+ "do", "keyword" ;
+ "done", "keyword" ;
+ "downto", "keyword" ;
+ "else", "keyword" ;
+ "end", "keyword" ;
+ "exception", "keyword" ;
+ "external", "keyword" ;
+ "false", "keyword" ;
+ "for", "keyword" ;
+ "fun", "keyword" ;
+ "function", "keyword" ;
+ "functor", "keyword" ;
+ "if", "keyword" ;
+ "in", "keyword" ;
+ "include", "keyword" ;
+ "inherit", "keyword" ;
+ "initializer", "keyword" ;
+ "lazy", "keyword" ;
+ "let", "keyword" ;
+ "match", "keyword" ;
+ "method", "keyword" ;
+ "module", "keyword" ;
+ "mutable", "keyword" ;
+ "new", "keyword" ;
+ "object", "keyword" ;
+ "of", "keyword" ;
+ "open", "keyword" ;
+ "or", "keyword" ;
+ "parser", "keyword" ;
+ "private", "keyword" ;
+ "rec", "keyword" ;
+ "sig", "keyword" ;
+ "struct", "keyword" ;
+ "then", "keyword" ;
+ "to", "keyword" ;
+ "true", "keyword" ;
+ "try", "keyword" ;
+ "type", "keyword" ;
+ "val", "keyword" ;
+ "virtual", "keyword" ;
+ "when", "keyword" ;
+ "while", "keyword" ;
+ "with", "keyword" ;
+
+ "mod", "keyword" ;
+ "land", "keyword" ;
+ "lor", "keyword" ;
+ "lxor", "keyword" ;
+ "lsl", "keyword" ;
+ "lsr", "keyword" ;
+ "asr", "keyword" ;
+]
+
+let kwsign_class = "keywordsign"
+let constructor_class = "constructor"
+let comment_class = "comment"
+let string_class = "string"
+let code_class = "code"
+
+
+(** To buffer and print comments *)
+
+
+let margin = ref 0
+
+let comment_buffer = Buffer.create 32
+let reset_comment_buffer () = Buffer.reset comment_buffer
+let store_comment_char = Buffer.add_char comment_buffer
+
+let make_margin () =
+ let rec iter n =
+ if n <= 0 then ""
+ else "&nbsp;"^(iter (n-1))
+ in
+ iter !margin
+
+let print_comment () =
+ let s = Buffer.contents comment_buffer in
+ let len = String.length s in
+ let code =
+ if len < 1 then
+ "<span class=\""^comment_class^"\">(*"^(escape s)^"*)</span>"
+ else
+ match s.[0] with
+ '*' ->
+ (
+ try
+ let html = !html_of_comment (String.sub s 1 (len-1)) in
+ "</code><table><tr><td>"^(make_margin ())^"</td><td>"^
+ "<span class=\""^comment_class^"\">"^
+ "(**"^html^"*)"^
+ "</span></td></tr></table><code>"
+ with
+ e ->
+ prerr_endline (Printexc.to_string e);
+ "<span class=\""^comment_class^"\">(*"^(escape s)^"*)</span>"
+ )
+ | _ ->
+ "<span class=\""^comment_class^"\">(*"^(escape s)^"*)</span>"
+ in
+ print ~esc: false code
+
+(** To buffer string literals *)
+
+let string_buffer = Buffer.create 32
+let reset_string_buffer () = Buffer.reset string_buffer
+let store_string_char = Buffer.add_char string_buffer
+let get_stored_string () =
+ let s = Buffer.contents string_buffer in
+ String.escaped s
+
+(** To translate escape sequences *)
+
+let char_for_backslash =
+ match Sys.os_type with
+ | "Unix" | "Win32" | "Cygwin" ->
+ begin function
+ | 'n' -> '\010'
+ | 'r' -> '\013'
+ | 'b' -> '\008'
+ | 't' -> '\009'
+ | c -> c
+ end
+ | "MacOS" ->
+ begin function
+ | 'n' -> '\013'
+ | 'r' -> '\010'
+ | 'b' -> '\008'
+ | 't' -> '\009'
+ | c -> c
+ end
+ | x -> fatal_error "Lexer: unknown system type"
+
+let char_for_decimal_code lexbuf i =
+ let c = 100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) +
+ 10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) +
+ (Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48) in
+ Char.chr(c land 0xFF)
+
+(** To store the position of the beginning of a string and comment *)
+let string_start_pos = ref 0;;
+let comment_start_pos = ref [];;
+let in_comment () = !comment_start_pos <> [];;
+
+(** Error report *)
+
+open Format
+
+let report_error ppf = function
+ | Illegal_character c ->
+ fprintf ppf "Illegal character (%s)" (Char.escaped c)
+ | Unterminated_comment ->
+ fprintf ppf "Comment not terminated"
+ | Unterminated_string ->
+ fprintf ppf "String literal not terminated"
+ | Unterminated_string_in_comment ->
+ fprintf ppf "This comment contains an unterminated string literal"
+ | Keyword_as_label kwd ->
+ fprintf ppf "`%s' is a keyword, it cannot be used as label name" kwd
+;;
+
+}
+
+let blank = [' ' '\010' '\013' '\009' '\012']
+let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']
+let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222']
+let identchar =
+ ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9']
+let symbolchar =
+ ['!' '$' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~']
+let decimal_literal = ['0'-'9']+
+let hex_literal = '0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']+
+let oct_literal = '0' ['o' 'O'] ['0'-'7']+
+let bin_literal = '0' ['b' 'B'] ['0'-'1']+
+let float_literal =
+ ['0'-'9']+ ('.' ['0'-'9']* )? (['e' 'E'] ['+' '-']? ['0'-'9']+)?
+
+rule token = parse
+ blank
+ {
+ let s = Lexing.lexeme lexbuf in
+ (
+ match s with
+ " " -> incr margin
+ | "\t" -> margin := !margin + 8
+ | "\n" -> margin := 0
+ | _ -> ()
+ );
+ print s;
+ token lexbuf
+ }
+ | "_"
+ { print "_" ; token lexbuf }
+ | "~" { print "~" ; token lexbuf }
+ | "~" lowercase identchar * ':'
+ { let s = Lexing.lexeme lexbuf in
+ let name = String.sub s 1 (String.length s - 2) in
+ if Hashtbl.mem keyword_table name then
+ raise (Error(Keyword_as_label name, Lexing.lexeme_start lexbuf,
+ Lexing.lexeme_end lexbuf));
+ print s ; token lexbuf }
+ | "?" { print "?" ; token lexbuf }
+ | "?" lowercase identchar * ':'
+ { let s = Lexing.lexeme lexbuf in
+ let name = String.sub s 1 (String.length s - 2) in
+ if Hashtbl.mem keyword_table name then
+ raise (Error(Keyword_as_label name, Lexing.lexeme_start lexbuf,
+ Lexing.lexeme_end lexbuf));
+ print s ; token lexbuf }
+ | lowercase identchar *
+ { let s = Lexing.lexeme lexbuf in
+ try
+ let cl = Hashtbl.find keyword_table s in
+ (print_class cl s ; token lexbuf )
+ with Not_found ->
+ (print s ; token lexbuf )}
+ | uppercase identchar *
+ { print_class constructor_class (Lexing.lexeme lexbuf) ; token lexbuf } (* No capitalized keywords *)
+ | decimal_literal | hex_literal | oct_literal | bin_literal
+ { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | float_literal
+ { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "\""
+ { reset_string_buffer();
+ let string_start = Lexing.lexeme_start lexbuf in
+ string_start_pos := string_start;
+ string lexbuf;
+ lexbuf.Lexing.lex_start_pos <-
+ string_start - lexbuf.Lexing.lex_abs_pos;
+ print_class string_class ("\""^(get_stored_string())^"\"") ;
+ token lexbuf }
+ | "'" [^ '\\' '\''] "'"
+ { print_class string_class (Lexing.lexeme lexbuf) ;
+ token lexbuf }
+ | "'" '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "'"
+ { print_class string_class (Lexing.lexeme lexbuf ) ;
+ token lexbuf }
+ | "'" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
+ { print_class string_class (Lexing.lexeme lexbuf ) ;
+ token lexbuf }
+ | "(*"
+ {
+ reset_comment_buffer ();
+ comment_start_pos := [Lexing.lexeme_start lexbuf];
+ comment lexbuf ;
+ print_comment ();
+ token lexbuf }
+ | "(*)"
+ { reset_comment_buffer ();
+ comment_start_pos := [Lexing.lexeme_start lexbuf];
+ comment lexbuf ;
+ print_comment ();
+ token lexbuf
+ }
+ | "*)"
+ { lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1;
+ print (Lexing.lexeme lexbuf) ;
+ token lexbuf
+ }
+ | "#" [' ' '\t']* ['0'-'9']+ [^ '\n' '\r'] * ('\n' | '\r' | "\r\n")
+ (* # linenum ... *)
+ {
+ print (Lexing.lexeme lexbuf);
+ token lexbuf
+ }
+ | "#" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "&" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "&&" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "`" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "'" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "(" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | ")" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "*" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "," { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "??" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "->" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "." { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | ".." { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | ":" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "::" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | ":=" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | ":>" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | ";" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | ";;" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "<" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "<-" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "=" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "[" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "[|" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "[<" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "]" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "{" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "{<" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "|" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "||" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "|]" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | ">" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | ">]" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "}" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | ">}" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+
+ | "!=" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "+" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "-" { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "-." { print (Lexing.lexeme lexbuf) ; token lexbuf }
+
+ | "!" symbolchar *
+ { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | ['~' '?'] symbolchar +
+ { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
+ | ['=' '<' '>' '|' '&' '$'] symbolchar *
+ { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | ['@' '^'] symbolchar *
+ { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | ['+' '-'] symbolchar *
+ { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | "**" symbolchar *
+ { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | ['*' '/' '%'] symbolchar *
+ { print (Lexing.lexeme lexbuf) ; token lexbuf }
+ | eof { () }
+ | _
+ { raise (Error(Illegal_character ((Lexing.lexeme lexbuf).[0]),
+ Lexing.lexeme_start lexbuf, Lexing.lexeme_end lexbuf)) }
+
+and comment = parse
+ "(*"
+ { comment_start_pos := Lexing.lexeme_start lexbuf :: !comment_start_pos;
+ store_comment_char '(';
+ store_comment_char '*';
+ comment lexbuf;
+ }
+ | "*)"
+ { match !comment_start_pos with
+ | [] -> assert false
+ | [x] -> comment_start_pos := []
+ | _ :: l ->
+ store_comment_char '*';
+ store_comment_char ')';
+ comment_start_pos := l;
+ comment lexbuf;
+ }
+ | "\""
+ { reset_string_buffer();
+ string_start_pos := Lexing.lexeme_start lexbuf;
+ store_comment_char '"';
+ begin try string lexbuf
+ with Error (Unterminated_string, _, _) ->
+ let st = List.hd !comment_start_pos in
+ raise (Error (Unterminated_string_in_comment, st, st + 2))
+ end;
+ comment lexbuf }
+ | "''"
+ {
+ store_comment_char '\'';
+ store_comment_char '\'';
+ comment lexbuf }
+ | "'" [^ '\\' '\''] "'"
+ {
+ store_comment_char '\'';
+ store_comment_char (Lexing.lexeme_char lexbuf 1);
+ store_comment_char '\'';
+ comment lexbuf }
+ | "'\\" ['\\' '\'' 'n' 't' 'b' 'r'] "'"
+ {
+ store_comment_char '\'';
+ store_comment_char '\\';
+ store_comment_char(char_for_backslash(Lexing.lexeme_char lexbuf 1)) ;
+ store_comment_char '\'';
+ comment lexbuf }
+ | "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
+ {
+ store_comment_char '\'';
+ store_comment_char '\\';
+ store_comment_char(char_for_decimal_code lexbuf 1);
+ store_comment_char '\'';
+ comment lexbuf }
+ | eof
+ { let st = List.hd !comment_start_pos in
+ raise (Error (Unterminated_comment, st, st + 2));
+ }
+ | _
+ { store_comment_char(Lexing.lexeme_char lexbuf 0);
+ comment lexbuf }
+
+and string = parse
+ '"'
+ { () }
+ | '\\' ("\010" | "\013" | "\013\010") [' ' '\009'] *
+ { string lexbuf }
+ | '\\' ['\\' '"' 'n' 't' 'b' 'r']
+ { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1));
+ string lexbuf }
+ | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']
+ { store_string_char(char_for_decimal_code lexbuf 1);
+ string lexbuf }
+ | eof
+ { raise (Error (Unterminated_string,
+ !string_start_pos, !string_start_pos+1)) }
+ | _
+ { store_string_char(Lexing.lexeme_char lexbuf 0);
+ string lexbuf }
+{
+
+let html_of_code ?(with_pre=true) code =
+ let old_pre = !pre in
+ let old_margin = !margin in
+ let old_comment_buffer = Buffer.contents comment_buffer in
+ let old_string_buffer = Buffer.contents string_buffer in
+ let buf = Buffer.create 256 in
+ let old_fmt = !fmt in
+ fmt := Format.formatter_of_buffer buf ;
+ pre := with_pre;
+ margin := 0;
+
+
+ let start = "<span class=\""^code_class^"\"><code>" in
+ let ending = "</code></span>" in
+ let html =
+ (
+ try
+ print ~esc: false start ;
+ let lexbuf = Lexing.from_string code in
+ let _ = token lexbuf in
+ print ~esc: false ending ;
+ Format.pp_print_flush !fmt () ;
+ Buffer.contents buf
+ with
+ _ ->
+ (* flush str_formatter because we already output
+ something in it *)
+ Format.pp_print_flush !fmt () ;
+ start^code^ending
+ )
+ in
+ pre := old_pre;
+ margin := old_margin ;
+ Buffer.reset comment_buffer;
+ Buffer.add_string comment_buffer old_comment_buffer ;
+ Buffer.reset string_buffer;
+ Buffer.add_string string_buffer old_string_buffer ;
+ fmt := old_fmt ;
+
+ html
+
+}