diff options
author | Damien Doligez <damien.doligez-inria.fr> | 2011-11-29 15:18:41 +0000 |
---|---|---|
committer | Damien Doligez <damien.doligez-inria.fr> | 2011-11-29 15:18:41 +0000 |
commit | 07a128aea15e212b4d8ff48fef925266a9dc968c (patch) | |
tree | 7a1376cc6d60c07a760eca61328ba0eb03fc0858 | |
parent | a3aad303bec6f93550b71f8ab5677308b4bbaac5 (diff) |
correct fontification for strings and comments
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@11289 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
-rw-r--r-- | emacs/caml-font.el | 341 |
1 files changed, 304 insertions, 37 deletions
diff --git a/emacs/caml-font.el b/emacs/caml-font.el index 09d1ae701..425c07622 100644 --- a/emacs/caml-font.el +++ b/emacs/caml-font.el @@ -1,30 +1,5 @@ -;(***********************************************************************) -;(* *) -;(* OCaml *) -;(* *) -;(* Jacques Garrigue and Ian T Zimmerman *) -;(* *) -;(* Copyright 1997 Institut National de Recherche en Informatique et *) -;(* en Automatique. All rights reserved. This file is distributed *) -;(* under the terms of the GNU General Public License. *) -;(* *) -;(***********************************************************************) - ;; caml-font: font-lock support for OCaml files -;; -;; rewrite and clean-up. -;; Changes: -;; - fontify strings and comments using syntactic font lock -;; - define a `font-lock-syntactic-face-function' to fontify ocamldoc comments -;; - fontify infix operators like mod, land, lsl, etc. -;; - fontify line number directives -;; - fontify "failwith" and "invalid_arg" like "raise" -;; - fontify '\x..' character constants -;; - use the regexp-opt function to build regexps (more readable) -;; - use backquote and comma in sexp (more readable) -;; - drop the `caml-quote-char' variable (I don't use caml-light :)) -;; - stop doing weird things with faces - +;; now with perfect parsing of comments and strings (require 'font-lock) @@ -48,9 +23,6 @@ (defconst caml-font-lock-keywords `( -;character literals - ("'\\(.\\|\\\\\\([ntbr\"'\\\\]\\|[0-9]\\{3\\}\\|x[0-9A-Fa-f]\\{2\\}\\)\\)'" - . font-lock-string-face) ;modules and constructors ("`?\\<[A-Z][A-Za-z0-9_']*\\>" . font-lock-function-name-face) ;definition @@ -99,14 +71,298 @@ ((looking-at "(\\*\\*[^*]") 'caml-font-doccomment-face) (t 'font-lock-comment-face))))))) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; In order to correctly fontify an OCaml buffer, it is necessary to +; lex the buffer to tell what is a comment and what is a string. +; We do this incrementally in a hook +; (font-lock-extend-after-change-region-function), which is called +; whenever the buffer changes. It sets the syntax-table property +; on each beginning and end of chars, strings, and comments. + +; This mode handles correctly all the strange cases in the following +; OCaml code. +; +; let l' _ = ();; +; let _' _ = ();; +; let l' = ();; +; let b2_' = ();; +; let a'a' = ();; +; let f2 _ _ = ();; +; let f3 _ _ _ = ();; +; let f' _ _ _ _ _ = ();; +; let hello = ();; +; +; (* ==== easy stuff ==== *) +; +; (* a comment *) +; (* "a string" in a comment *) +; (* "another string *)" in a comment *) +; (* not a string '"' in a comment *) +; "a string";; +; '"';; (* not a string *) +; +; (* ==== hard stuff ==== *) +; +; l'"' not not a string ";; +; _'"' also not not a string";; +; f2 0l'"';; (* not not not a string *) +; f2 0_'"';; (* also not not not a string *) +; f3 0.0l'"' not not not not a string ";; +; f3 0.0_'"';; (* not not not not not a string *) +; f2 0b01_'"';; (* not not not a string *) +; f3 0b2_'"' not not not not a string ";; +; f3 0b02_'"';; (* not not not not not a string *) +; '\'';; (* a char *) +; ' +; ';; (* a char *) +; '^M +; ';; (* also a char [replace ^M with one CR character] *) +; a'a';; (* not a char *) +; type ' +; a' t = X;; (* also not a char *) +; +; (* ==== far-out stuff ==== *) +; +; f'"'" "*) print_endline "hello";;(* \"" ;; +; (* f'"'" "*) print_endline "hello";;(* \"" ;; *) + + +(defconst caml-font-ident-re + "[A-Za-z_\300-\326\330-\366\370-\377][A-Za-z_\300-\326\330-\366\370-\377'0-9]*" +) + +(defconst caml-font-int-re + "\\(0[xX][0-9A-Fa-f][0-9A-Fa-f_]*\\|0[oO][0-7][0-7_]*\\|0[bB][01][01_]*\\)[lLn]?" +) + +; decimal integers are folded into the RE for floats to get longest-match +; without using posix-looking-at +(defconst caml-font-decimal-re + "[0-9][0-9_]*\\([lLn]\\|\\.[0-9_]*\\)?\\([eE][+-]?[0-9][0-9_]*\\)?" +) + +; match any ident or numeral token +(defconst caml-font-ident-or-num-re + (concat caml-font-ident-re "\\|" caml-font-int-re "\\|" caml-font-decimal-re) +) + +; match any char token +(defconst caml-font-char-re + "'\\(\015\012\\|[^\\']\\|\\(\\\\\\([\\'\"ntbr ]\\|[0-9][0-9][0-9]\\|x[0-9A-Fa-f][0-9A-Fa-f]\\)\\)\\)'" +) + +; match a quote followed by a newline +(defconst caml-font-quote-newline-re + "'\\(\015\012\\|[\012\015]\\)" +) + +; match any token or sequence of tokens that cannot contain a +; quote, double quote, a start of comment, or a newline +; note: this is only to go faster than one character at a time +(defconst caml-font-other-re + "[^A-Za-z_0-9\012\015\300-\326\330-\366\370-\377'\"(]+" +) + +; match any sequence of non-special characters in a comment +; note: this is only to go faster than one character at a time +(defconst caml-font-other-comment-re + "[^(*\"'\012\015]+" +) + +; match any sequence of non-special characters in a string +; note: this is only to go faster than one character at a time +(defconst caml-font-other-string-re + "[^\\\"\012\015]" +) + +; match a newline +(defconst caml-font-newline-re + "\\(\015\012\\|[\012\015]\\)" +) + +; Put the 'caml-font-state property with the given state on the +; character before pos. Return nil if it was already there, t if not. +(defun caml-font-put-state (pos state) + (if (equal state (get-text-property (1- pos) 'caml-font-state)) + nil + (put-text-property (1- pos) pos 'caml-font-state state) + t) +) + +; Same as looking-at, but erase properties 'caml-font-state and +; 'syntax-table from the matched range +(defun caml-font-looking-at (re) + (let ((result (looking-at re))) + (when result + (remove-text-properties (match-beginning 0) (match-end 0) + '(syntax-table nil caml-font-state nil))) + result) +) + +; Annotate the buffer starting at point in state (st . depth) +; Set the 'syntax-table property on beginnings and ends of: +; - strings +; - chars +; - comments +; Also set the 'caml-font-state property on each LF character that is +; not preceded by a single quote. The property gives the state of the +; lexer (nil or t) after reading that character. + +; Leave the point at a point where the pre-existing 'caml-font-state +; property is consistent with the new parse, or at the end of the buffer. + +; depth is the depth of nested comments at this point +; it must be a non-negative integer +; st can be: +; nil -- we are in the base state +; t -- we are within a string + +(defun caml-font-annotate (st depth) + (let ((continue t)) + (while (and continue (not (eobp))) + (cond + ((and (equal st nil) (= depth 0)) ; base state, outside comment + (cond + ((caml-font-looking-at caml-font-ident-or-num-re) + (goto-char (match-end 0))) + ((caml-font-looking-at caml-font-char-re) + (put-text-property (point) (1+ (point)) + 'syntax-table (string-to-syntax "|")) + (put-text-property (1- (match-end 0)) (match-end 0) + 'syntax-table (string-to-syntax "|")) + (goto-char (match-end 0))) + ((caml-font-looking-at caml-font-quote-newline-re) + (goto-char (match-end 0))) + ((caml-font-looking-at "\"") + (put-text-property (point) (1+ (point)) + 'syntax-table (string-to-syntax "|")) + (goto-char (match-end 0)) + (setq st t)) + ((caml-font-looking-at "(\\*") + (put-text-property (point) (1+ (point)) + 'syntax-table (string-to-syntax "!")) + (goto-char (match-end 0)) + (setq depth 1)) + ((looking-at caml-font-newline-re) + (goto-char (match-end 0)) + (setq continue (caml-font-put-state (match-end 0) '(nil . 0)))) + ((caml-font-looking-at caml-font-other-re) + (goto-char (match-end 0))) + (t + (remove-text-properties (point) (1+ (point)) + '(syntax-table nil caml-font-state nil)) + (goto-char (1+ (point)))))) + ((equal st nil) ; base state inside comment + (cond + ((caml-font-looking-at "(\\*") + (goto-char (match-end 0)) + (setq depth (1+ depth))) + ((caml-font-looking-at "\\*)") + (goto-char (match-end 0)) + (setq depth (1- depth)) + (when (= depth 0) + (put-text-property (1- (point)) (point) + 'syntax-table (string-to-syntax "!")))) + ((caml-font-looking-at "\"") + (goto-char (match-end 0)) + (setq st t)) + ((caml-font-looking-at caml-font-char-re) + (goto-char (match-end 0))) + ((caml-font-looking-at caml-font-quote-newline-re) + (goto-char (match-end 0))) + ((caml-font-looking-at "''") + (goto-char (match-end 0))) + ((looking-at caml-font-newline-re) + (goto-char (match-end 0)) + (setq continue (caml-font-put-state (match-end 0) (cons nil depth)))) + ((caml-font-looking-at caml-font-other-comment-re) + (goto-char (match-end 0))) + (t + (remove-text-properties (point) (1+ (point)) + '(syntax-table nil caml-font-state nil)) + (goto-char (1+ (point)))))) + (t ; string state inside or outside a comment + (cond + ((caml-font-looking-at "\"") + (when (= depth 0) + (put-text-property (point) (1+ (point)) + 'syntax-table (string-to-syntax "|"))) + (goto-char (1+ (point))) + (setq st nil)) + ((caml-font-looking-at "\\\\[\"\\]") + (goto-char (match-end 0))) + ((looking-at caml-font-newline-re) + (goto-char (match-end 0)) + (setq continue (caml-font-put-state (match-end 0) (cons t depth)))) + ((caml-font-looking-at caml-font-other-string-re) + (goto-char (match-end 0))) + (t + (remove-text-properties (point) (1+ (point)) + '(syntax-table nil caml-font-state nil)) + (goto-char (1+ (point))))))))) +) + +; This is the hook function for font-lock-extend-after-change-function +; It finds the nearest saved state at the left of the changed text, +; calls caml-font-annotate to set the 'caml-font-state and 'syntax-table +; properties, then returns the range that was parsed by caml-font-annotate. +(defun caml-font-extend-after-change (beg end &optional old-len) + (save-excursion + (save-match-data + (let ((caml-font-modified (buffer-modified-p)) + start-at + end-at + state) + (remove-text-properties beg end '(syntax-table nil caml-font-state nil)) + (setq start-at + (or (and (> beg (point-min)) + (get-text-property (1- beg) 'caml-font-state) + beg) + (previous-single-property-change beg 'caml-font-state) + (point-min))) + (setq state (or (and (> start-at (point-min)) + (get-text-property (1- start-at) 'caml-font-state)) + (cons nil 0))) + (goto-char start-at) + (caml-font-annotate (car state) (cdr state)) + (setq end-at (point)) + (restore-buffer-modified-p caml-font-modified) + (cons start-at end-at)))) +) + +; We don't use the normal caml-mode syntax table because it contains an +; approximation of strings and comments that interferes with our +; annotations. +(defconst caml-font-syntax-table + (let ((tbl (make-syntax-table))) + (modify-syntax-entry ?' "w" tbl) + (modify-syntax-entry ?_ "w" tbl) + (modify-syntax-entry ?\" "." tbl) + (modify-syntax-entry '(?\300 . ?\326) "w" tbl) + (modify-syntax-entry '(?\330 . ?\366) "w" tbl) + (modify-syntax-entry '(?\370 . ?\377) "w" tbl) + tbl)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; font-lock commands are similar for caml-mode and inferior-caml-mode (defun caml-font-set-font-lock () + (setq parse-sexp-lookup-properties t) (setq font-lock-defaults - '(caml-font-lock-keywords - nil nil nil nil - (font-lock-syntactic-face-function . caml-font-syntactic-face))) - (font-lock-mode 1)) + (list + 'caml-font-lock-keywords ; keywords + nil ; keywords-only + nil ; case-fold + nil ; syntax-alist + nil ; syntax-begin + (cons 'font-lock-syntax-table caml-font-syntax-table) + '(font-lock-extend-after-change-region-function + . caml-font-extend-after-change) + '(font-lock-syntactic-face-function . caml-font-syntactic-face) + )) + (caml-font-extend-after-change (point-min) (point-max) 0) + (font-lock-mode 1) +) (add-hook 'caml-mode-hook 'caml-font-set-font-lock) @@ -116,11 +372,22 @@ ,@caml-font-lock-keywords)) (defun inferior-caml-set-font-lock () + (setq parse-sexp-lookup-properties t) (setq font-lock-defaults - '(inferior-caml-font-lock-keywords - nil nil nil nil - (font-lock-syntactic-face-function . caml-font-syntactic-face))) - (font-lock-mode 1)) + (list + 'inferior-caml-font-lock-keywords ; keywords + nil ; keywords-only + nil ; case-fold + nil ; syntax-alist + nil ; syntax-begin + (cons 'font-lock-syntax-table caml-font-syntax-table) + '(font-lock-extend-after-change-region-function + . caml-font-extend-after-change) + '(font-lock-syntactic-face-function . caml-font-syntactic-face) + )) + (caml-font-extend-after-change (point-min) (point-max) 0) + (font-lock-mode 1) +) (add-hook 'inferior-caml-mode-hooks 'inferior-caml-set-font-lock) (provide 'caml-font) |