diff options
Diffstat (limited to 'ocamlbuild/glob_lexer.mll')
-rw-r--r-- | ocamlbuild/glob_lexer.mll | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/ocamlbuild/glob_lexer.mll b/ocamlbuild/glob_lexer.mll new file mode 100644 index 000000000..8bf49900d --- /dev/null +++ b/ocamlbuild/glob_lexer.mll @@ -0,0 +1,114 @@ +(***********************************************************************) +(* ocamlbuild *) +(* *) +(* Nicolas Pouillard, Berke Durak, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2007 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* $Id$ *) +(* Original author: Berke Durak *) +(* Glob *) +{ +open Bool;; +open Glob_ast;; + +type token = +| ATOM of pattern atom +| AND +| OR +| NOT +| LPAR +| RPAR +| TRUE +| FALSE +| EOF +;; + +let sf = Printf.sprintf;; + +let concat_patterns p1 p2 = + match (p1,p2) with + | (Epsilon,_) -> p2 + | (_,Epsilon) -> p1 + | (_,_) -> Concat(p1,p2) +;; + +let slash = Class(Atom('/','/'));; +let not_slash = Class(Not(Atom('/','/')));; +let any = Class True;; +} + +let pattern_chars = ['a'-'z']|['A'-'Z']|'_'|'-'|['0'-'9']|'.' +let space_chars = [' ' '\t' '\n' '\r' '\012'] + +rule token = parse +| '<' { ATOM(Pattern(let (p,_) = parse_pattern ['>'] Epsilon lexbuf in p)) } +| '"' { ATOM(Constant(parse_string (Buffer.create 32) lexbuf)) } +| "and"|"AND"|"&" { AND } +| "or"|"OR"|"|" { OR } +| "not"|"NOT"|"~" { NOT } +| "true"|"1" { TRUE } +| "false"|"0" { FALSE } +| "(" { LPAR } +| ")" { RPAR } +| space_chars+ { token lexbuf } +| eof { EOF } + +and parse_pattern eof_chars p = parse +| (pattern_chars+ as u) { parse_pattern eof_chars (concat_patterns p (Word u)) lexbuf } +| '{' + { + let rec loop pl = + let (p',c) = parse_pattern ['}';','] Epsilon lexbuf in + let pl = p' :: pl in + if c = ',' then + loop pl + else + parse_pattern eof_chars (concat_patterns p (Union pl)) lexbuf + in + loop [] + } +| "[^" + { + let cl = Not(Or(parse_class [] lexbuf)) in + parse_pattern eof_chars (concat_patterns p (Class cl)) lexbuf + } +| '[' + { + let cl = Or(parse_class [] lexbuf) in + parse_pattern eof_chars (concat_patterns p (Class cl)) lexbuf + } +| "/**/" (* / | /\Sigma^*/ *) + { let q = Union[slash; Concat(slash, Concat(Star any, slash)) ] in + parse_pattern eof_chars (concat_patterns p q) lexbuf } +| "/**" (* \varepsilon | /\Sigma^* *) + { let q = Union[Epsilon; Concat(slash, Star any)] in + parse_pattern eof_chars (concat_patterns p q) lexbuf } +| "**/" (* \varepsilon | \Sigma^*/ *) + { let q = Union[Epsilon; Concat(Star any, slash)] in + parse_pattern eof_chars (concat_patterns p q) lexbuf } +| "**" { raise (Parse_error("Ambiguous ** pattern not allowed unless surrounded by one or more slashes")) } +| '*' { parse_pattern eof_chars (concat_patterns p (Star not_slash)) lexbuf } +| '/' { parse_pattern eof_chars (concat_patterns p slash) lexbuf } +| '?' { parse_pattern eof_chars (concat_patterns p (Class True)) lexbuf } +| _ as c + { if List.mem c eof_chars then + (p,c) + else + raise (Parse_error(sf "Unexpected character %C in glob pattern" c)) + } + +and parse_string b = parse +| "\"" { Buffer.contents b } +| "\\\"" { Buffer.add_char b '"'; parse_string b lexbuf } +| [^'"' '\\']+ as u { Buffer.add_string b u; parse_string b lexbuf } + +and parse_class cl = parse +| ']' { cl } +| "-]" { ((Atom('-','-'))::cl) } +| (_ as c1) '-' (_ as c2) { parse_class ((Atom(c1,c2))::cl) lexbuf } +| _ as c { parse_class ((Atom(c,c))::cl) lexbuf } |