diff options
Diffstat (limited to 'ocamldoc/odoc_text_lexer.mll')
-rw-r--r-- | ocamldoc/odoc_text_lexer.mll | 521 |
1 files changed, 521 insertions, 0 deletions
diff --git a/ocamldoc/odoc_text_lexer.mll b/ocamldoc/odoc_text_lexer.mll new file mode 100644 index 000000000..54b7db057 --- /dev/null +++ b/ocamldoc/odoc_text_lexer.mll @@ -0,0 +1,521 @@ +{ +(***********************************************************************) +(* OCamldoc *) +(* *) +(* Maxence Guesdon, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 2001 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(** The lexer for string to build text structures. *) + +open Lexing +open Odoc_text_parser + +let line_number = ref 0 +let char_number = ref 0 + +let string_buffer = Buffer.create 32 + +(** Fonction de remise � z�ro de la chaine de caract�res tampon *) +let reset_string_buffer () = Buffer.reset string_buffer + +(** Fonction d'ajout d'un caract�re dans la chaine de caract�res tampon *) +let ajout_char_string = Buffer.add_char string_buffer + +(** Add a string to the buffer. *) +let ajout_string = Buffer.add_string string_buffer + +let lecture_string () = Buffer.contents string_buffer + + +(** the variable which will contain the description string. + Is initialized when we encounter the start of a special comment. *) + +let description = ref "" + +let blank = "[ \013\009\012]" + + +let print_DEBUG s = print_string s; print_newline () + +(** this flag indicates whether we're in a string between begin_code and end_code tokens, to + remember the number of open '[' and handle ']' correctly. *) +let open_brackets = ref 0 + +(** this flag indicates if we're in verbatim mode or not, to handle any special expression + like a string when we're in verbatim mode.*) +let verb_mode = ref false + +(** this flag indicates if we're in latex mode or not, to handle any special expression + like a string when we're in latex mode.*) +let latex_mode = ref false + +(** this flag indicates if we're in shortcut list mode or not, to handle end_shortcut_list correctly.*) +let shortcut_list_mode = ref false + +(** this flag indicates if we're in an element reference. *) +let ele_ref_mode = ref false + +(** this flag indicates if we're in a preformatted code string. *) +let code_pre_mode = ref false + +let init () = + open_brackets := 0; + verb_mode := false; + latex_mode := false; + shortcut_list_mode := false; + ele_ref_mode := false ; + code_pre_mode := false ; + line_number := 0 ; + char_number := 0 + +let incr_cpts lexbuf = + let s = Lexing.lexeme lexbuf in + let l = Str.split_delim (Str.regexp_string "\n") s in + match List.rev l with + [] -> () (* should not occur *) + | [s2] -> (* no newline *) + char_number := !char_number + (String.length s2) + | s2 :: _ -> + line_number := !line_number + ((List.length l) - 1) ; + char_number := String.length s2 + +} + +(** html marks, to use as alternative possible special strings *) + +let html_bold = "<"('b'|'B')">" +let html_end_bold = "</"('b'|'B')">" +let html_italic = "<"('i'|'I')">" +let html_end_italic = "</"('i'|'I')">" +let html_title = "<"('h'|'H')(['0'-'9'])+">" +let html_end_title = "</"('h'|'H')(['0'-'9'])+">" +let html_list = "<"('u'|'U')('l'|'L')">" +let html_end_list = "</"('u'|'U')('l'|'L')">" +let html_enum = "<"('o'|'O')('l'|'L')">" +let html_end_enum = "</"('o'|'O')('l'|'L')">" +let html_item = "<"('l'|'L')('i'|'I')">" +let html_end_item = "</"('l'|'L')('i'|'I')">" +let html_code = "<"('c'|'C')('o'|'O')('d'|'D')('e'|'E')">" +let html_end_code = "</"('c'|'C')('o'|'O')('d'|'D')('e'|'E')">" +let html_center = "<"('c'|'C')('e'|'E')('n'|'N')('t'|'T')('e'|'E')('r'|'R')">" +let html_end_center = "</"('c'|'C')('e'|'E')('n'|'N')('t'|'T')('e'|'E')('r'|'R')">" +let html_left = "<"('l'|'L')('e'|'E')('f'|'F')('t'|'T')">" +let html_end_left = "</"('l'|'L')('e'|'E')('f'|'F')('t'|'T')">" +let html_right = "<"('r'|'R')('i'|'I')('g'|'G')('h'|'H')('t'|'T')">" +let html_end_right = "</"('r'|'R')('i'|'I')('g'|'G')('h'|'H')('t'|'T')">" + + +let blank = [' ' '\013' '\009' '\012'] +let blank_nl = [' ' '\013' '\009' '\012' '\010'] +let label = ['a'-'z']+['a'-'z' 'A'-'Z' '0'-'9' '_']* + +(** special strings *) + +let end = "}" + | html_end_bold + | html_end_italic + | html_end_title + | html_end_list + | html_end_enum + | html_end_item + | html_end_center +let begin_title = + ("{" ['0'-'9']+(":"label)? blank_nl) + | html_title + +let begin_bold = "{b"blank_nl | html_bold +let begin_emp = "{e"blank_nl +let begin_center = "{C"blank_nl | html_center +let begin_left = "{L"blank_nl +let begin_right = "{R"blank_nl +let begin_italic = "{i"blank_nl | html_italic +let begin_list = "{ul" | html_list +let begin_enum = "{ol" | html_enum +let begin_item = "{li"blank_nl | "{- " | html_item +let begin_link = "{{:" +let begin_latex = "{%"blank_nl +let end_latex = "%}" +let begin_code = "[" | html_code +let end_code = "]" | html_end_code +let begin_code_pre = "{[" +let end_code_pre = "]}" +let begin_verb = "{v"blank_nl +let end_verb = blank_nl"v}" +let begin_ele_ref = "{!"blank_nl | "{!" +let begin_superscript = "{^"blank_nl | "{^" +let begin_subscript = "{_"blank_nl | "{_" + +let shortcut_list_item = '\n'blank*"- " +let shortcut_enum_item = '\n'blank*"+ " +let end_shortcut_list = '\n'(blank*'\n')+ + +rule main = parse +| "\\{" +| "\\}" +| "\\[" +| "\\]" + { + incr_cpts lexbuf ; + let s = Lexing.lexeme lexbuf in + Char (String.sub s 1 1) + } + +| end + { + incr_cpts lexbuf ; + if !verb_mode or !latex_mode or !code_pre_mode or + (!open_brackets >= 1) then + Char (Lexing.lexeme lexbuf) + else + let _ = + if !ele_ref_mode then + ele_ref_mode := false + in + END + } +| begin_title + { + incr_cpts lexbuf ; + if !verb_mode or !latex_mode or !code_pre_mode or + (!open_brackets >= 1) or !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + let s = Lexing.lexeme lexbuf in + try + (* chech if the "{..." or html_title mark was used. *) + if s.[0] = '<' then + let (n, l) = (2, (String.length s - 3)) in + let s2 = String.sub s n l in + Title (int_of_string s2, None) + else + let (n, l) = (1, (String.length s - 2)) in + let s2 = String.sub s n l in + try + let i = String.index s2 ':' in + let s_n = String.sub s2 0 i in + let s_label = String.sub s2 (i+1) (l-i-1) in + Title (int_of_string s_n, Some s_label) + with + Not_found -> + Title (int_of_string s2, None) + with + _ -> + Title (1, None) + } +| begin_bold + { + incr_cpts lexbuf ; + if !verb_mode or !latex_mode or !code_pre_mode or + (!open_brackets >= 1) or !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + BOLD + } +| begin_italic + { + incr_cpts lexbuf ; + if !verb_mode or !latex_mode or !code_pre_mode or + (!open_brackets >= 1) or !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + ITALIC + } +| begin_link + { + incr_cpts lexbuf ; + if !verb_mode or !latex_mode or !code_pre_mode or + (!open_brackets >= 1) or !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + LINK + } +| begin_emp + { + incr_cpts lexbuf ; + if !verb_mode or !latex_mode or !code_pre_mode or + (!open_brackets >= 1) or !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + EMP + } +| begin_superscript + { + incr_cpts lexbuf ; + if !verb_mode or !latex_mode or !code_pre_mode or + (!open_brackets >= 1) or !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + SUPERSCRIPT + } +| begin_subscript + { + incr_cpts lexbuf ; + if !verb_mode or !latex_mode or !code_pre_mode or + (!open_brackets >= 1) or !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + SUBSCRIPT + } +| begin_center + { + incr_cpts lexbuf ; + if !verb_mode or !latex_mode or !code_pre_mode or + (!open_brackets >= 1) or !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + CENTER + } +| begin_left + { + incr_cpts lexbuf ; + if !verb_mode or !latex_mode or !code_pre_mode or + (!open_brackets >= 1) or !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + LEFT + } +| begin_right + { + incr_cpts lexbuf ; + if !verb_mode or !latex_mode or !code_pre_mode + or (!open_brackets >= 1) or !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + RIGHT + } +| begin_list + { + incr_cpts lexbuf ; + if !verb_mode or !latex_mode or !code_pre_mode or + (!open_brackets >= 1) or !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + LIST + } +| begin_enum + { + incr_cpts lexbuf ; + if !verb_mode or !latex_mode or !code_pre_mode or + (!open_brackets >= 1) or !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + ENUM + } +| begin_item + { + incr_cpts lexbuf ; + if !verb_mode or !latex_mode or !code_pre_mode or + (!open_brackets >= 1) or !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + ITEM + } +| begin_latex + { + incr_cpts lexbuf ; + if !verb_mode or !latex_mode or !code_pre_mode or + (!open_brackets >= 1) or !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + ( + latex_mode := true; + LATEX + ) + } +| end_latex + { + incr_cpts lexbuf ; + if !verb_mode or (!open_brackets >= 1) or !code_pre_mode or + !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + ( + latex_mode := false; + END_LATEX + ) + } +| begin_code end_code + { + incr_cpts lexbuf ; + Char (Lexing.lexeme lexbuf) + } + +| begin_code + { + incr_cpts lexbuf ; + if !verb_mode or !latex_mode or !code_pre_mode or !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + if !open_brackets <= 0 then + ( + open_brackets := 1; + CODE + ) + else + ( + incr open_brackets; + Char (Lexing.lexeme lexbuf) + ) + } +| end_code + { + incr_cpts lexbuf ; + if !verb_mode or !latex_mode or !code_pre_mode or !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + if !open_brackets > 1 then + ( + decr open_brackets; + Char "]" + ) + else + ( + open_brackets := 0; + END_CODE + ) + } + +| begin_code_pre end_code_pre + { + incr_cpts lexbuf ; + Char (Lexing.lexeme lexbuf) + } + +| begin_code_pre + { + incr_cpts lexbuf ; + if !verb_mode or !latex_mode or !code_pre_mode or !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + ( + code_pre_mode := true; + CODE_PRE + ) + } +| end_code_pre + { + incr_cpts lexbuf ; + if !verb_mode or !latex_mode or !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + if !code_pre_mode then + ( + code_pre_mode := false; + END_CODE_PRE + ) + else + Char (Lexing.lexeme lexbuf) + } + +| begin_ele_ref end + { + incr_cpts lexbuf ; + Char (Lexing.lexeme lexbuf) + } + +| begin_ele_ref + { + incr_cpts lexbuf ; + if !verb_mode or !latex_mode or !code_pre_mode or !open_brackets >= 1 then + Char (Lexing.lexeme lexbuf) + else + if not !ele_ref_mode then + ( + ele_ref_mode := true; + ELE_REF + ) + else + ( + Char (Lexing.lexeme lexbuf) + ) + } + +| begin_verb + { + incr_cpts lexbuf ; + if !latex_mode or (!open_brackets >= 1) or !code_pre_mode or !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + ( + verb_mode := true; + VERB + ) + } +| end_verb + { + incr_cpts lexbuf ; + if !latex_mode or (!open_brackets >= 1) or !code_pre_mode or !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + ( + verb_mode := false; + END_VERB + ) + } + +| shortcut_list_item + { + incr_cpts lexbuf ; + if !shortcut_list_mode then + ( + SHORTCUT_LIST_ITEM + ) + else + ( + shortcut_list_mode := true; + BEGIN_SHORTCUT_LIST_ITEM + ) + } + +| shortcut_enum_item + { + incr_cpts lexbuf ; + if !shortcut_list_mode then + SHORTCUT_ENUM_ITEM + else + ( + shortcut_list_mode := true; + BEGIN_SHORTCUT_ENUM_ITEM + ) + } +| end_shortcut_list + { + incr_cpts lexbuf ; + lexbuf.Lexing.lex_abs_pos <- lexbuf.Lexing.lex_abs_pos - 1; + lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1; + lexbuf.Lexing.lex_last_pos <- lexbuf.Lexing.lex_last_pos - 1; + decr line_number ; + if !shortcut_list_mode then + ( + shortcut_list_mode := false; + (* go back one char to re-use the last '\n', so we can + restart another shortcut-list with a single blank line, + and not two.*) + END_SHORTCUT_LIST + ) + else + BLANK_LINE + } + +| eof { EOF } + +| "{" + { + incr_cpts lexbuf ; + if !latex_mode or (!open_brackets >= 1) or !code_pre_mode or !ele_ref_mode then + Char (Lexing.lexeme lexbuf) + else + ERROR + } +| _ + { + incr_cpts lexbuf ; + Char (Lexing.lexeme lexbuf) + } + + |